bitkeeper revision 1.1159.1.506 (41d00f89OifvQoN_EcnO5XuQN0RRjA)
authorcl349@arcadians.cl.cam.ac.uk <cl349@arcadians.cl.cam.ac.uk>
Mon, 27 Dec 2004 13:35:05 +0000 (13:35 +0000)
committercl349@arcadians.cl.cam.ac.uk <cl349@arcadians.cl.cam.ac.uk>
Mon, 27 Dec 2004 13:35:05 +0000 (13:35 +0000)
Update to Linux 2.6.10.

27 files changed:
.rootkeys
linux-2.6.10-xen-sparse/arch/xen/configs/xen0_defconfig
linux-2.6.10-xen-sparse/arch/xen/configs/xenU_defconfig
linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c [new file with mode: 0644]
linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c [new file with mode: 0644]
linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c [new file with mode: 0644]
linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c [new file with mode: 0644]
linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile [new file with mode: 0644]
linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c [new file with mode: 0644]
linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h [new file with mode: 0644]
linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c [new file with mode: 0644]
linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c [new file with mode: 0644]
linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c [new file with mode: 0644]
linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h [new file with mode: 0644]
linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h [new file with mode: 0644]
linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c [deleted file]
linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c [deleted file]
linux-2.6.9-xen-sparse/arch/xen/kernel/smp.c [deleted file]
linux-2.6.9-xen-sparse/drivers/xen/blktap/Makefile [deleted file]
linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.c [deleted file]
linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.h [deleted file]
linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c [deleted file]
linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_datapath.c [deleted file]
linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_userdev.c [deleted file]
linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/hardirq.h [deleted file]
linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h [deleted file]
linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/spinlock.h [deleted file]

index f0894e1d743a9a354a24f3e8979d695cde729bc0..dc3e9eacd9435383df6594a5fe8761e1e4eaa2d6 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
 40f56238bnvciAuyzAiMkdzGErYt1A linux-2.6.10-xen-sparse/arch/xen/i386/kernel/head.S
 40f58a0d31M2EkuPbG94ns_nOi0PVA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
 40faa751_zbZlAmLyQgCXdYekVFdWA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/ioport.c
+41d00d82zN8IfLBRxc7G_i7lbwT3cQ linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c
 40f56238ue3YRsK52HG7iccNzP1AwQ linux-2.6.10-xen-sparse/arch/xen/i386/kernel/ldt.c
 4107adf1cNtsuOxOB4T6paAoY2R2PA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/pci-dma.c
 40f56238a8iOVDEoostsbun_sy2i4g linux-2.6.10-xen-sparse/arch/xen/i386/kernel/process.c
 40f56238YQIJoYG2ehDGEcdTgLmGbg linux-2.6.10-xen-sparse/arch/xen/i386/kernel/setup.c
 40f56238nWMQg7CKbyTy0KJNvCzbtg linux-2.6.10-xen-sparse/arch/xen/i386/kernel/signal.c
+41811cac4lkCB-fHir6CcxuEJ2pGsQ linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c
+41811ca9mbGpqBrZVrUGEiv8CTV3ng linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c
 40f56238qVGkpO_ycnQA8k03kQzAgA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/time.c
 40f56238NzTgeO63RGoxHrW5NQeO3Q linux-2.6.10-xen-sparse/arch/xen/i386/kernel/timers/Makefile
 40f56238BMqG5PuSHufpjbvp_helBw linux-2.6.10-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c
 412dfae9eA3_6e6bCGUtg1mj8b56fQ linux-2.6.10-xen-sparse/arch/xen/kernel/gnttab.c
 40f562392LBhwmOxVPsYdkYXMxI_ZQ linux-2.6.10-xen-sparse/arch/xen/kernel/reboot.c
 414c113396tK1HTVeUalm3u-1DF16g linux-2.6.10-xen-sparse/arch/xen/kernel/skbuff.c
+418f90e4lGdeJK9rmbOB1kN-IKSjsQ linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c
 3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.10-xen-sparse/arch/xen/kernel/xen_proc.c
 41261688yS8eAyy-7kzG4KBs0xbYCA linux-2.6.10-xen-sparse/drivers/Makefile
 4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.10-xen-sparse/drivers/char/mem.c
 40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.10-xen-sparse/drivers/xen/blkfront/blkfront.c
 40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.10-xen-sparse/drivers/xen/blkfront/block.h
 40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.10-xen-sparse/drivers/xen/blkfront/vbd.c
+41a226e0vjAcDXHOnXE5ummcdUD2mg linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile
+41a226e0VeZA1N8tbU6nvJ3OxUcJmw linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c
+41a226e1k4J5VMLnrYXDWRqElS49YQ linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h
+41a226e1-A_Hy7utS8vJKaXnH_tzfA linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
+41a226e19NoUUTOvs7jumDMRYDIO4Q linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c
+41a226e1MNSyWWK5dEVgvSQ5OW0fDA linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c
 40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.10-xen-sparse/drivers/xen/console/Makefile
 3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.10-xen-sparse/drivers/xen/console/console.c
 40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.10-xen-sparse/drivers/xen/evtchn/Makefile
 40f5623aKXkBBxgpLx2NcvkncQ1Yyw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
 40f5623aDMCsWOFO0jktZ4e8sjwvEg linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
 40f5623arsFXkGdPvIqvFi3yFXGR0Q linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h
+41811f07Iri9hrvs97t-baxmhOwWDQ linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
 4120f807GCO0uqsLqdZj9csxR1Wthw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
 40f5623adgjZq9nAgCt0IXdWl7udSA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/page.h
 40f5623a54NuG-7qHihGYmw4wWQnMA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/param.h
 412ea0afQL2CAI-f522TbLjLPMibPQ linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/ptrace.h
 40f5623bzLvxr7WoJIxVf2OH4rCBJg linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/segment.h
 40f5623bG_LzgG6-qwk292nTc5Wabw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/setup.h
+4198c32a8NzmcKVOzKaEJfaQxxiA0A linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h
 40f5623bgzm_9vwxpzJswlAxg298Gg linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h
 40f5623bVdKP7Dt7qm8twu3NcnGNbA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/system.h
 40f5623bc8LKPRO09wY5dGDnY_YCpw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
 412f46c0LJuKAgSPGoC0Z1DEkLfuLA linux-2.6.10-xen-sparse/mm/memory.c
 410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.10-xen-sparse/mm/page_alloc.c
 41505c572m-s9ATiO1LiD1GPznTTIg linux-2.6.10-xen-sparse/net/core/skbuff.c
-41811cac4lkCB-fHir6CcxuEJ2pGsQ linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c
-41811ca9mbGpqBrZVrUGEiv8CTV3ng linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c
-418f90e4lGdeJK9rmbOB1kN-IKSjsQ linux-2.6.9-xen-sparse/arch/xen/kernel/smp.c
-41a226e0vjAcDXHOnXE5ummcdUD2mg linux-2.6.9-xen-sparse/drivers/xen/blktap/Makefile
-41a226e0VeZA1N8tbU6nvJ3OxUcJmw linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.c
-41a226e1k4J5VMLnrYXDWRqElS49YQ linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.h
-41a226e1-A_Hy7utS8vJKaXnH_tzfA linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
-41a226e19NoUUTOvs7jumDMRYDIO4Q linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_datapath.c
-41a226e1MNSyWWK5dEVgvSQ5OW0fDA linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_userdev.c
-41a64cdeQ5SWVEVbSZ0K-IeHHhIJ_w linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/hardirq.h
-41811f07Iri9hrvs97t-baxmhOwWDQ linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
-4198c32a8NzmcKVOzKaEJfaQxxiA0A linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/spinlock.h
 413cb1e4zst25MDYjg63Y-NGC5_pLg netbsd-2.0-xen-sparse/Makefile
 413cb1e5c_Mkxf_X0zimEhTKI_l4DA netbsd-2.0-xen-sparse/mkbuildtree
 413cb1e5kY_Zil7-b0kI6hvCIxBEYg netbsd-2.0-xen-sparse/nbconfig-xen
index ebf3df11ea8e34f0197ba6424f9b7c99d2f0e817..059e6571d2d7bb4bd44e89677544cc1911237a69 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.10-rc3-xen0
-# Sun Dec 26 10:34:29 2004
+# Linux kernel version: 2.6.10-xen0
+# Mon Dec 27 10:14:40 2004
 #
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
@@ -17,8 +17,8 @@ CONFIG_XEN_BLKDEV_BACKEND=y
 CONFIG_XEN_NETDEV_BACKEND=y
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-# CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
+# CONFIG_XEN_BLKDEV_TAP is not set
 CONFIG_XEN_WRITABLE_PAGETABLES=y
 CONFIG_XEN_SCRUB_PAGES=y
 CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
index 56dcbfd336a7b740b5d0d14dbfd0fe40ad7677ab..db3c07f766874bbf73cfea7ebf99b7c2eaf7f22a 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.10-rc3-xenU
-# Sun Dec 26 10:35:15 2004
+# Linux kernel version: 2.6.10-xenU
+# Mon Dec 27 10:15:03 2004
 #
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
@@ -13,12 +13,11 @@ CONFIG_NO_IDLE_HZ=y
 # CONFIG_XEN_PRIVILEGED_GUEST is not set
 # CONFIG_XEN_PHYSDEV_ACCESS is not set
 # CONFIG_XEN_BLKDEV_BACKEND is not set
-# CONFIG_XEN_BLKDEV_TAP_BE is not set
 # CONFIG_XEN_NETDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-# CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
+# CONFIG_XEN_BLKDEV_TAP is not set
 CONFIG_XEN_WRITABLE_PAGETABLES=y
 CONFIG_XEN_SCRUB_PAGES=y
 CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c
new file mode 100644 (file)
index 0000000..6cd16cc
--- /dev/null
@@ -0,0 +1,258 @@
+/*
+ *     linux/arch/i386/kernel/irq.c
+ *
+ *     Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the lowest level x86-specific interrupt
+ * entry, irq-stacks and irq statistics code. All the remaining
+ * irq logic is done by the generic kernel/irq/ code and
+ * by the x86-specific irq controller code. (e.g. i8259.c and
+ * io_apic.c.)
+ */
+
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#ifndef CONFIG_X86_LOCAL_APIC
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+       printk("unexpected IRQ trap at vector %02x\n", irq);
+}
+#endif
+
+#ifdef CONFIG_4KSTACKS
+/*
+ * per-CPU IRQ handling contexts (thread information and stack)
+ */
+union irq_ctx {
+       struct thread_info      tinfo;
+       u32                     stack[THREAD_SIZE/sizeof(u32)];
+};
+
+static union irq_ctx *hardirq_ctx[NR_CPUS];
+static union irq_ctx *softirq_ctx[NR_CPUS];
+#endif
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+fastcall unsigned int do_IRQ(struct pt_regs *regs)
+{      
+       /* high bits used in ret_from_ code */
+       int irq = regs->orig_eax & __IRQ_MASK(HARDIRQ_BITS);
+#ifdef CONFIG_4KSTACKS
+       union irq_ctx *curctx, *irqctx;
+       u32 *isp;
+#endif
+
+       irq_enter();
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+       /* Debugging check for stack overflow: is there less than 1KB free? */
+       {
+               long esp;
+
+               __asm__ __volatile__("andl %%esp,%0" :
+                                       "=r" (esp) : "0" (THREAD_SIZE - 1));
+               if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
+                       printk("do_IRQ: stack overflow: %ld\n",
+                               esp - sizeof(struct thread_info));
+                       dump_stack();
+               }
+       }
+#endif
+
+#ifdef CONFIG_4KSTACKS
+
+       curctx = (union irq_ctx *) current_thread_info();
+       irqctx = hardirq_ctx[smp_processor_id()];
+
+       /*
+        * this is where we switch to the IRQ stack. However, if we are
+        * already using the IRQ stack (because we interrupted a hardirq
+        * handler) we can't do that and just have to keep using the
+        * current stack (which is the irq stack already after all)
+        */
+       if (curctx != irqctx) {
+               int arg1, arg2, ebx;
+
+               /* build the stack frame on the IRQ stack */
+               isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+               irqctx->tinfo.task = curctx->tinfo.task;
+               irqctx->tinfo.previous_esp = current_stack_pointer;
+
+               asm volatile(
+                       "       xchgl   %%ebx,%%esp      \n"
+                       "       call    __do_IRQ         \n"
+                       "       movl   %%ebx,%%esp      \n"
+                       : "=a" (arg1), "=d" (arg2), "=b" (ebx)
+                       :  "0" (irq),   "1" (regs),  "2" (isp)
+                       : "memory", "cc", "ecx"
+               );
+       } else
+#endif
+               __do_IRQ(irq, regs);
+
+       irq_exit();
+
+       return 1;
+}
+
+#ifdef CONFIG_4KSTACKS
+
+/*
+ * These should really be __section__(".bss.page_aligned") as well, but
+ * gcc's 3.0 and earlier don't handle that correctly.
+ */
+static char softirq_stack[NR_CPUS * THREAD_SIZE]
+               __attribute__((__aligned__(THREAD_SIZE)));
+
+static char hardirq_stack[NR_CPUS * THREAD_SIZE]
+               __attribute__((__aligned__(THREAD_SIZE)));
+
+/*
+ * allocate per-cpu stacks for hardirq and for softirq processing
+ */
+void irq_ctx_init(int cpu)
+{
+       union irq_ctx *irqctx;
+
+       if (hardirq_ctx[cpu])
+               return;
+
+       irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
+       irqctx->tinfo.task              = NULL;
+       irqctx->tinfo.exec_domain       = NULL;
+       irqctx->tinfo.cpu               = cpu;
+       irqctx->tinfo.preempt_count     = HARDIRQ_OFFSET;
+       irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+       hardirq_ctx[cpu] = irqctx;
+
+       irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
+       irqctx->tinfo.task              = NULL;
+       irqctx->tinfo.exec_domain       = NULL;
+       irqctx->tinfo.cpu               = cpu;
+       irqctx->tinfo.preempt_count     = SOFTIRQ_OFFSET;
+       irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+       softirq_ctx[cpu] = irqctx;
+
+       printk("CPU %u irqstacks, hard=%p soft=%p\n",
+               cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
+}
+
+extern asmlinkage void __do_softirq(void);
+
+asmlinkage void do_softirq(void)
+{
+       unsigned long flags;
+       struct thread_info *curctx;
+       union irq_ctx *irqctx;
+       u32 *isp;
+
+       if (in_interrupt())
+               return;
+
+       local_irq_save(flags);
+
+       if (local_softirq_pending()) {
+               curctx = current_thread_info();
+               irqctx = softirq_ctx[smp_processor_id()];
+               irqctx->tinfo.task = curctx->task;
+               irqctx->tinfo.previous_esp = current_stack_pointer;
+
+               /* build the stack frame on the softirq stack */
+               isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+
+               asm volatile(
+                       "       xchgl   %%ebx,%%esp     \n"
+                       "       call    __do_softirq    \n"
+                       "       movl    %%ebx,%%esp     \n"
+                       : "=b"(isp)
+                       : "0"(isp)
+                       : "memory", "cc", "edx", "ecx", "eax"
+               );
+       }
+
+       local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(do_softirq);
+#endif
+
+/*
+ * Interrupt statistics:
+ */
+
+atomic_t irq_err_count;
+
+/*
+ * /proc/interrupts printing:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+       int i = *(loff_t *) v, j;
+       struct irqaction * action;
+       unsigned long flags;
+
+       if (i == 0) {
+               seq_printf(p, "           ");
+               for (j=0; j<NR_CPUS; j++)
+                       if (cpu_online(j))
+                               seq_printf(p, "CPU%d       ",j);
+               seq_putc(p, '\n');
+       }
+
+       if (i < NR_IRQS) {
+               spin_lock_irqsave(&irq_desc[i].lock, flags);
+               action = irq_desc[i].action;
+               if (!action)
+                       goto skip;
+               seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+               seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+               for (j = 0; j < NR_CPUS; j++)
+                       if (cpu_online(j))
+                               seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+               seq_printf(p, " %14s", irq_desc[i].handler->typename);
+               seq_printf(p, "  %s", action->name);
+
+               for (action=action->next; action; action = action->next)
+                       seq_printf(p, ", %s", action->name);
+
+               seq_putc(p, '\n');
+skip:
+               spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+       } else if (i == NR_IRQS) {
+               seq_printf(p, "NMI: ");
+               for (j = 0; j < NR_CPUS; j++)
+                       if (cpu_online(j))
+                               seq_printf(p, "%10u ", nmi_count(j));
+               seq_putc(p, '\n');
+#ifdef CONFIG_X86_LOCAL_APIC
+               seq_printf(p, "LOC: ");
+               for (j = 0; j < NR_CPUS; j++)
+                       if (cpu_online(j))
+                               seq_printf(p, "%10u ",
+                                       irq_stat[j].apic_timer_irqs);
+               seq_putc(p, '\n');
+#endif
+               seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+               seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+       }
+       return 0;
+}
diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c
new file mode 100644 (file)
index 0000000..9fabbfe
--- /dev/null
@@ -0,0 +1,599 @@
+/*
+ *     Intel SMP support routines.
+ *
+ *     (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *     (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *     This code is released under the GNU General Public License version 2 or
+ *     later.
+ */
+
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+
+#include <asm/mtrr.h>
+#include <asm/tlbflush.h>
+#if 0
+#include <mach_apic.h>
+#endif
+#include <asm-xen/evtchn.h>
+
+#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
+
+/*
+ *     Some notes on x86 processor bugs affecting SMP operation:
+ *
+ *     Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
+ *     The Linux implications for SMP are handled as follows:
+ *
+ *     Pentium III / [Xeon]
+ *             None of the E1AP-E3AP errata are visible to the user.
+ *
+ *     E1AP.   see PII A1AP
+ *     E2AP.   see PII A2AP
+ *     E3AP.   see PII A3AP
+ *
+ *     Pentium II / [Xeon]
+ *             None of the A1AP-A3AP errata are visible to the user.
+ *
+ *     A1AP.   see PPro 1AP
+ *     A2AP.   see PPro 2AP
+ *     A3AP.   see PPro 7AP
+ *
+ *     Pentium Pro
+ *             None of 1AP-9AP errata are visible to the normal user,
+ *     except occasional delivery of 'spurious interrupt' as trap #15.
+ *     This is very rare and a non-problem.
+ *
+ *     1AP.    Linux maps APIC as non-cacheable
+ *     2AP.    worked around in hardware
+ *     3AP.    fixed in C0 and above steppings microcode update.
+ *             Linux does not use excessive STARTUP_IPIs.
+ *     4AP.    worked around in hardware
+ *     5AP.    symmetric IO mode (normal Linux operation) not affected.
+ *             'noapic' mode has vector 0xf filled out properly.
+ *     6AP.    'noapic' mode might be affected - fixed in later steppings
+ *     7AP.    We do not assume writes to the LVT deassering IRQs
+ *     8AP.    We do not enable low power mode (deep sleep) during MP bootup
+ *     9AP.    We do not use mixed mode
+ *
+ *     Pentium
+ *             There is a marginal case where REP MOVS on 100MHz SMP
+ *     machines with B stepping processors can fail. XXX should provide
+ *     an L1cache=Writethrough or L1cache=off option.
+ *
+ *             B stepping CPUs may hang. There are hardware work arounds
+ *     for this. We warn about it in case your board doesn't have the work
+ *     arounds. Basically thats so I can tell anyone with a B stepping
+ *     CPU and SMP problems "tough".
+ *
+ *     Specific items [From Pentium Processor Specification Update]
+ *
+ *     1AP.    Linux doesn't use remote read
+ *     2AP.    Linux doesn't trust APIC errors
+ *     3AP.    We work around this
+ *     4AP.    Linux never generated 3 interrupts of the same priority
+ *             to cause a lost local interrupt.
+ *     5AP.    Remote read is never used
+ *     6AP.    not affected - worked around in hardware
+ *     7AP.    not affected - worked around in hardware
+ *     8AP.    worked around in hardware - we get explicit CS errors if not
+ *     9AP.    only 'noapic' mode affected. Might generate spurious
+ *             interrupts, we log only the first one and count the
+ *             rest silently.
+ *     10AP.   not affected - worked around in hardware
+ *     11AP.   Linux reads the APIC between writes to avoid this, as per
+ *             the documentation. Make sure you preserve this as it affects
+ *             the C stepping chips too.
+ *     12AP.   not affected - worked around in hardware
+ *     13AP.   not affected - worked around in hardware
+ *     14AP.   we always deassert INIT during bootup
+ *     15AP.   not affected - worked around in hardware
+ *     16AP.   not affected - worked around in hardware
+ *     17AP.   not affected - worked around in hardware
+ *     18AP.   not affected - worked around in hardware
+ *     19AP.   not affected - worked around in BIOS
+ *
+ *     If this sounds worrying believe me these bugs are either ___RARE___,
+ *     or are signal timing bugs worked around in hardware and there's
+ *     about nothing of note with C stepping upwards.
+ */
+
+DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
+
+/*
+ * the following functions deal with sending IPIs between CPUs.
+ *
+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ */
+
+static inline int __prepare_ICR (unsigned int shortcut, int vector)
+{
+       return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+}
+
+static inline int __prepare_ICR2 (unsigned int mask)
+{
+       return SET_APIC_DEST_FIELD(mask);
+}
+
+DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
+
+static inline void __send_IPI_one(unsigned int cpu, int vector)
+{
+       unsigned int evtchn;
+
+       evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
+       // printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, vector, evtchn);
+       if (evtchn) {
+#if 0
+               shared_info_t *s = HYPERVISOR_shared_info;
+               while (synch_test_bit(evtchn, &s->evtchn_pending[0]) ||
+                      synch_test_bit(evtchn, &s->evtchn_mask[0]))
+                       ;
+#endif
+               notify_via_evtchn(evtchn);
+       } else
+               printk("send_IPI to unbound port %d/%d",
+                      cpu, vector);
+}
+
+void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+       int cpu;
+
+       switch (shortcut) {
+       case APIC_DEST_SELF:
+               __send_IPI_one(smp_processor_id(), vector);
+               break;
+       case APIC_DEST_ALLBUT:
+               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+                       if (cpu == smp_processor_id())
+                               continue;
+                       if (cpu_isset(cpu, cpu_online_map)) {
+                               __send_IPI_one(cpu, vector);
+                       }
+               }
+               break;
+       default:
+               printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
+                      vector);
+               break;
+       }
+}
+
+void fastcall send_IPI_self(int vector)
+{
+       __send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
+/*
+ * This is only used on smaller machines.
+ */
+void send_IPI_mask_bitmask(cpumask_t mask, int vector)
+{
+       unsigned long flags;
+       unsigned int cpu;
+
+       local_irq_save(flags);
+
+       for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+               if (cpu_isset(cpu, mask)) {
+                       __send_IPI_one(cpu, vector);
+               }
+       }
+
+       local_irq_restore(flags);
+}
+
+inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
+{
+
+       send_IPI_mask_bitmask(mask, vector);
+}
+
+#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
+
+/*
+ *     Smarter SMP flushing macros. 
+ *             c/o Linus Torvalds.
+ *
+ *     These mean you can really definitely utterly forget about
+ *     writing to user space from interrupts. (Its not allowed anyway).
+ *
+ *     Optimizations Manfred Spraul <manfred@colorfullife.com>
+ */
+
+static cpumask_t flush_cpumask;
+static struct mm_struct * flush_mm;
+static unsigned long flush_va;
+static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED;
+#define FLUSH_ALL      0xffffffff
+
+/*
+ * We cannot call mmdrop() because we are in interrupt context, 
+ * instead update mm->cpu_vm_mask.
+ *
+ * We need to reload %cr3 since the page tables may be going
+ * away from under us..
+ */
+static inline void leave_mm (unsigned long cpu)
+{
+       if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
+               BUG();
+       cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
+       load_cr3(swapper_pg_dir);
+}
+
+/*
+ *
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+ * 1) switch_mm() either 1a) or 1b)
+ * 1a) thread switch to a different mm
+ * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
+ *     Stop ipi delivery for the old mm. This is not synchronized with
+ *     the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ *     for the wrong mm, and in the worst case we perform a superflous
+ *     tlb flush.
+ * 1a2) set cpu_tlbstate to TLBSTATE_OK
+ *     Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ *     was in lazy tlb mode.
+ * 1a3) update cpu_tlbstate[].active_mm
+ *     Now cpu0 accepts tlb flushes for the new mm.
+ * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
+ *     Now the other cpus will send tlb flush ipis.
+ * 1a4) change cr3.
+ * 1b) thread switch without mm change
+ *     cpu_tlbstate[].active_mm is correct, cpu0 already handles
+ *     flush ipis.
+ * 1b1) set cpu_tlbstate to TLBSTATE_OK
+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+ *     Atomically set the bit [other cpus will start sending flush ipis],
+ *     and test the bit.
+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+ * 2) switch %%esp, ie current
+ *
+ * The interrupt must handle 2 special cases:
+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+ *   runs in kernel space, the cpu could load tlb entries for user space
+ *   pages.
+ *
+ * The good news is that cpu_tlbstate is local to each cpu, no
+ * write/read ordering problems.
+ */
+
+/*
+ * TLB flush IPI:
+ *
+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ */
+
+irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
+                                    struct pt_regs *regs)
+{
+       unsigned long cpu;
+
+       cpu = get_cpu();
+
+       if (!cpu_isset(cpu, flush_cpumask))
+               goto out;
+               /* 
+                * This was a BUG() but until someone can quote me the
+                * line from the intel manual that guarantees an IPI to
+                * multiple CPUs is retried _only_ on the erroring CPUs
+                * its staying as a return
+                *
+                * BUG();
+                */
+                
+       if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
+               if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+                       if (flush_va == FLUSH_ALL)
+                               local_flush_tlb();
+                       else
+                               __flush_tlb_one(flush_va);
+               } else
+                       leave_mm(cpu);
+       }
+       smp_mb__before_clear_bit();
+       cpu_clear(cpu, flush_cpumask);
+       smp_mb__after_clear_bit();
+out:
+       put_cpu_no_resched();
+
+       return IRQ_HANDLED;
+}
+
+static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
+                                               unsigned long va)
+{
+       cpumask_t tmp;
+       /*
+        * A couple of (to be removed) sanity checks:
+        *
+        * - we do not send IPIs to not-yet booted CPUs.
+        * - current CPU must not be in mask
+        * - mask must exist :)
+        */
+       BUG_ON(cpus_empty(cpumask));
+
+       cpus_and(tmp, cpumask, cpu_online_map);
+       BUG_ON(!cpus_equal(cpumask, tmp));
+       BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+       BUG_ON(!mm);
+
+       /*
+        * i'm not happy about this global shared spinlock in the
+        * MM hot path, but we'll see how contended it is.
+        * Temporarily this turns IRQs off, so that lockups are
+        * detected by the NMI watchdog.
+        */
+       spin_lock(&tlbstate_lock);
+       
+       flush_mm = mm;
+       flush_va = va;
+#if NR_CPUS <= BITS_PER_LONG
+       atomic_set_mask(cpumask, &flush_cpumask);
+#else
+       {
+               int k;
+               unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
+               unsigned long *cpu_mask = (unsigned long *)&cpumask;
+               for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
+                       atomic_set_mask(cpu_mask[k], &flush_mask[k]);
+       }
+#endif
+       /*
+        * We have to send the IPI only to
+        * CPUs affected.
+        */
+       send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+
+       while (!cpus_empty(flush_cpumask))
+               /* nothing. lockup detection does not belong here */
+               mb();
+
+       flush_mm = NULL;
+       flush_va = 0;
+       spin_unlock(&tlbstate_lock);
+}
+       
+void flush_tlb_current_task(void)
+{
+       struct mm_struct *mm = current->mm;
+       cpumask_t cpu_mask;
+
+       preempt_disable();
+       cpu_mask = mm->cpu_vm_mask;
+       cpu_clear(smp_processor_id(), cpu_mask);
+
+       local_flush_tlb();
+       if (!cpus_empty(cpu_mask))
+               flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+       preempt_enable();
+}
+
+void flush_tlb_mm (struct mm_struct * mm)
+{
+       cpumask_t cpu_mask;
+
+       preempt_disable();
+       cpu_mask = mm->cpu_vm_mask;
+       cpu_clear(smp_processor_id(), cpu_mask);
+
+       if (current->active_mm == mm) {
+               if (current->mm)
+                       local_flush_tlb();
+               else
+                       leave_mm(smp_processor_id());
+       }
+       if (!cpus_empty(cpu_mask))
+               flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+
+       preempt_enable();
+}
+
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       cpumask_t cpu_mask;
+
+       preempt_disable();
+       cpu_mask = mm->cpu_vm_mask;
+       cpu_clear(smp_processor_id(), cpu_mask);
+
+       if (current->active_mm == mm) {
+               if(current->mm)
+                       __flush_tlb_one(va);
+               else
+                       leave_mm(smp_processor_id());
+       }
+
+       if (!cpus_empty(cpu_mask))
+               flush_tlb_others(cpu_mask, mm, va);
+
+       preempt_enable();
+}
+
+static void do_flush_tlb_all(void* info)
+{
+       unsigned long cpu = smp_processor_id();
+
+       __flush_tlb_all();
+       if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
+               leave_mm(cpu);
+}
+
+void flush_tlb_all(void)
+{
+       on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+}
+
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+void smp_send_reschedule(int cpu)
+{
+       send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+}
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ */
+static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
+
+struct call_data_struct {
+       void (*func) (void *info);
+       void *info;
+       atomic_t started;
+       atomic_t finished;
+       int wait;
+};
+
+static struct call_data_struct * call_data;
+
+/*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ */
+
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+                       int wait)
+/*
+ * [SUMMARY] Run a function on all other CPUs.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <nonatomic> currently unused.
+ * <wait> If true, wait (atomically) until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code. Does not return until
+ * remote CPUs are nearly ready to execute <<func>> or are or have executed.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+{
+       struct call_data_struct data;
+       int cpus = num_online_cpus()-1;
+
+       if (!cpus)
+               return 0;
+
+       /* Can deadlock when called with interrupts disabled */
+       WARN_ON(irqs_disabled());
+
+       data.func = func;
+       data.info = info;
+       atomic_set(&data.started, 0);
+       data.wait = wait;
+       if (wait)
+               atomic_set(&data.finished, 0);
+
+       spin_lock(&call_lock);
+       call_data = &data;
+       mb();
+       
+       /* Send a message to all other CPUs and wait for them to respond */
+       send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+       /* Wait for response */
+       while (atomic_read(&data.started) != cpus)
+               barrier();
+
+       if (wait)
+               while (atomic_read(&data.finished) != cpus)
+                       barrier();
+       spin_unlock(&call_lock);
+
+       return 0;
+}
+
+static void stop_this_cpu (void * dummy)
+{
+       /*
+        * Remove this CPU:
+        */
+       cpu_clear(smp_processor_id(), cpu_online_map);
+       local_irq_disable();
+#if 1
+       xxprint("stop_this_cpu disable_local_APIC\n");
+#else
+       disable_local_APIC();
+#endif
+       if (cpu_data[smp_processor_id()].hlt_works_ok)
+               for(;;) __asm__("hlt");
+       for (;;);
+}
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+
+void smp_send_stop(void)
+{
+       smp_call_function(stop_this_cpu, NULL, 1, 0);
+
+       local_irq_disable();
+#if 1
+       xxprint("smp_send_stop disable_local_APIC\n");
+#else
+       disable_local_APIC();
+#endif
+       local_irq_enable();
+}
+
+/*
+ * Reschedule call back. Nothing to do,
+ * all the work is done automatically when
+ * we return from the interrupt.
+ */
+irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
+                                    struct pt_regs *regs)
+{
+
+       return IRQ_HANDLED;
+}
+
+#include <linux/kallsyms.h>
+irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
+                                       struct pt_regs *regs)
+{
+       void (*func) (void *info) = call_data->func;
+       void *info = call_data->info;
+       int wait = call_data->wait;
+
+       /*
+        * Notify initiating CPU that I've grabbed the data and am
+        * about to execute the function
+        */
+       mb();
+       atomic_inc(&call_data->started);
+       /*
+        * At this point the info structure may be out of scope unless wait==1
+        */
+       irq_enter();
+       (*func)(info);
+       irq_exit();
+
+       if (wait) {
+               mb();
+               atomic_inc(&call_data->finished);
+       }
+
+       return IRQ_HANDLED;
+}
+
diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c
new file mode 100644 (file)
index 0000000..a9bb0e2
--- /dev/null
@@ -0,0 +1,1364 @@
+/*
+ *     x86 SMP booting functions
+ *
+ *     (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *     (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *     Much of the core SMP work is based on previous work by Thomas Radke, to
+ *     whom a great many thanks are extended.
+ *
+ *     Thanks to Intel for making available several different Pentium,
+ *     Pentium Pro and Pentium-II/Xeon MP machines.
+ *     Original development of Linux SMP code supported by Caldera.
+ *
+ *     This code is released under the GNU General Public License version 2 or
+ *     later.
+ *
+ *     Fixes
+ *             Felix Koop      :       NR_CPUS used properly
+ *             Jose Renau      :       Handle single CPU case.
+ *             Alan Cox        :       By repeated request 8) - Total BogoMIPS report.
+ *             Greg Wright     :       Fix for kernel stacks panic.
+ *             Erich Boleyn    :       MP v1.4 and additional changes.
+ *     Matthias Sattler        :       Changes for 2.1 kernel map.
+ *     Michel Lespinasse       :       Changes for 2.1 kernel map.
+ *     Michael Chastain        :       Change trampoline.S to gnu as.
+ *             Alan Cox        :       Dumb bug: 'B' step PPro's are fine
+ *             Ingo Molnar     :       Added APIC timers, based on code
+ *                                     from Jose Renau
+ *             Ingo Molnar     :       various cleanups and rewrites
+ *             Tigran Aivazian :       fixed "0.00 in /proc/uptime on SMP" bug.
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs
+ *             Martin J. Bligh :       Added support for multi-quad systems
+ *             Dave Jones      :       Report invalid combinations of Athlon CPUs.
+*              Rusty Russell   :       Hacked into shape for new "hotplug" boot process. */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/kernel_stat.h>
+#include <linux/smp_lock.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+
+#include <linux/delay.h>
+#include <linux/mc146818rtc.h>
+#include <asm/tlbflush.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+
+#if 1
+#define Dprintk(args...)
+#else
+#include <mach_apic.h>
+#endif
+#include <mach_wakecpu.h>
+#include <smpboot_hooks.h>
+
+/* Set if we find a B stepping CPU */
+static int __initdata smp_b_stepping;
+
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
+
+/* bitmap of online cpus */
+cpumask_t cpu_online_map;
+
+static cpumask_t cpu_callin_map;
+cpumask_t cpu_callout_map;
+static cpumask_t smp_commenced_mask;
+
+/* Per CPU bogomips and other parameters */
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+
+u8 x86_cpu_to_apicid[NR_CPUS] =
+                       { [0 ... NR_CPUS-1] = 0xff };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
+
+/* Set when the idlers are all forked */
+int smp_threads_ready;
+
+#if 0
+/*
+ * Trampoline 80x86 program as an array.
+ */
+
+extern unsigned char trampoline_data [];
+extern unsigned char trampoline_end  [];
+static unsigned char *trampoline_base;
+static int trampoline_exec;
+
+/*
+ * Currently trivial. Write the real->protected mode
+ * bootstrap into the page concerned. The caller
+ * has made sure it's suitably aligned.
+ */
+
+static unsigned long __init setup_trampoline(void)
+{
+       memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
+       return virt_to_phys(trampoline_base);
+}
+#endif
+
+/*
+ * We are called very early to get the low memory for the
+ * SMP bootup trampoline page.
+ */
+void __init smp_alloc_memory(void)
+{
+#if 1
+       int cpu;
+
+       for (cpu = 1; cpu < NR_CPUS; cpu++) {
+               cpu_gdt_descr[cpu].address = (unsigned long)
+                       alloc_bootmem_low_pages(PAGE_SIZE);
+               /* XXX free unused pages later */
+       }
+#else
+       trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
+       /*
+        * Has to be in very low memory so we can execute
+        * real-mode AP code.
+        */
+       if (__pa(trampoline_base) >= 0x9F000)
+               BUG();
+       /*
+        * Make the SMP trampoline executable:
+        */
+       trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
+#endif
+}
+
+/*
+ * The bootstrap kernel entry code has set these up. Save them for
+ * a given CPU
+ */
+
+static void __init smp_store_cpu_info(int id)
+{
+       struct cpuinfo_x86 *c = cpu_data + id;
+
+       *c = boot_cpu_data;
+       if (id!=0)
+               identify_cpu(c);
+       /*
+        * Mask B, Pentium, but not Pentium MMX
+        */
+       if (c->x86_vendor == X86_VENDOR_INTEL &&
+           c->x86 == 5 &&
+           c->x86_mask >= 1 && c->x86_mask <= 4 &&
+           c->x86_model <= 3)
+               /*
+                * Remember we have B step Pentia with bugs
+                */
+               smp_b_stepping = 1;
+
+       /*
+        * Certain Athlons might work (for various values of 'work') in SMP
+        * but they are not certified as MP capable.
+        */
+       if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
+
+               /* Athlon 660/661 is valid. */  
+               if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
+                       goto valid_k7;
+
+               /* Duron 670 is valid */
+               if ((c->x86_model==7) && (c->x86_mask==0))
+                       goto valid_k7;
+
+               /*
+                * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
+                * It's worth noting that the A5 stepping (662) of some Athlon XP's
+                * have the MP bit set.
+                * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
+                */
+               if (((c->x86_model==6) && (c->x86_mask>=2)) ||
+                   ((c->x86_model==7) && (c->x86_mask>=1)) ||
+                    (c->x86_model> 7))
+                       if (cpu_has_mp)
+                               goto valid_k7;
+
+               /* If we get here, it's not a certified SMP capable AMD system. */
+               tainted |= TAINT_UNSAFE_SMP;
+       }
+
+valid_k7:
+       ;
+}
+
+#if 0
+/*
+ * TSC synchronization.
+ *
+ * We first check whether all CPUs have their TSC's synchronized,
+ * then we print a warning if not, and always resync.
+ */
+
+static atomic_t tsc_start_flag = ATOMIC_INIT(0);
+static atomic_t tsc_count_start = ATOMIC_INIT(0);
+static atomic_t tsc_count_stop = ATOMIC_INIT(0);
+static unsigned long long tsc_values[NR_CPUS];
+
+#define NR_LOOPS 5
+
+static void __init synchronize_tsc_bp (void)
+{
+       int i;
+       unsigned long long t0;
+       unsigned long long sum, avg;
+       long long delta;
+       unsigned long one_usec;
+       int buggy = 0;
+
+       printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
+
+       /* convert from kcyc/sec to cyc/usec */
+       one_usec = cpu_khz / 1000;
+
+       atomic_set(&tsc_start_flag, 1);
+       wmb();
+
+       /*
+        * We loop a few times to get a primed instruction cache,
+        * then the last pass is more or less synchronized and
+        * the BP and APs set their cycle counters to zero all at
+        * once. This reduces the chance of having random offsets
+        * between the processors, and guarantees that the maximum
+        * delay between the cycle counters is never bigger than
+        * the latency of information-passing (cachelines) between
+        * two CPUs.
+        */
+       for (i = 0; i < NR_LOOPS; i++) {
+               /*
+                * all APs synchronize but they loop on '== num_cpus'
+                */
+               while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
+                       mb();
+               atomic_set(&tsc_count_stop, 0);
+               wmb();
+               /*
+                * this lets the APs save their current TSC:
+                */
+               atomic_inc(&tsc_count_start);
+
+               rdtscll(tsc_values[smp_processor_id()]);
+               /*
+                * We clear the TSC in the last loop:
+                */
+               if (i == NR_LOOPS-1)
+                       write_tsc(0, 0);
+
+               /*
+                * Wait for all APs to leave the synchronization point:
+                */
+               while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
+                       mb();
+               atomic_set(&tsc_count_start, 0);
+               wmb();
+               atomic_inc(&tsc_count_stop);
+       }
+
+       sum = 0;
+       for (i = 0; i < NR_CPUS; i++) {
+               if (cpu_isset(i, cpu_callout_map)) {
+                       t0 = tsc_values[i];
+                       sum += t0;
+               }
+       }
+       avg = sum;
+       do_div(avg, num_booting_cpus());
+
+       sum = 0;
+       for (i = 0; i < NR_CPUS; i++) {
+               if (!cpu_isset(i, cpu_callout_map))
+                       continue;
+               delta = tsc_values[i] - avg;
+               if (delta < 0)
+                       delta = -delta;
+               /*
+                * We report bigger than 2 microseconds clock differences.
+                */
+               if (delta > 2*one_usec) {
+                       long realdelta;
+                       if (!buggy) {
+                               buggy = 1;
+                               printk("\n");
+                       }
+                       realdelta = delta;
+                       do_div(realdelta, one_usec);
+                       if (tsc_values[i] < avg)
+                               realdelta = -realdelta;
+
+                       printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
+               }
+
+               sum += delta;
+       }
+       if (!buggy)
+               printk("passed.\n");
+}
+
+static void __init synchronize_tsc_ap (void)
+{
+       int i;
+
+       /*
+        * Not every cpu is online at the time
+        * this gets called, so we first wait for the BP to
+        * finish SMP initialization:
+        */
+       while (!atomic_read(&tsc_start_flag)) mb();
+
+       for (i = 0; i < NR_LOOPS; i++) {
+               atomic_inc(&tsc_count_start);
+               while (atomic_read(&tsc_count_start) != num_booting_cpus())
+                       mb();
+
+               rdtscll(tsc_values[smp_processor_id()]);
+               if (i == NR_LOOPS-1)
+                       write_tsc(0, 0);
+
+               atomic_inc(&tsc_count_stop);
+               while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+       }
+}
+#undef NR_LOOPS
+#endif
+
+extern void calibrate_delay(void);
+
+static atomic_t init_deasserted;
+
+void __init smp_callin(void)
+{
+       int cpuid, phys_id;
+       unsigned long timeout;
+
+#if 0
+       /*
+        * If waken up by an INIT in an 82489DX configuration
+        * we may get here before an INIT-deassert IPI reaches
+        * our local APIC.  We have to wait for the IPI or we'll
+        * lock up on an APIC access.
+        */
+       wait_for_init_deassert(&init_deasserted);
+#endif
+
+       /*
+        * (This works even if the APIC is not enabled.)
+        */
+       phys_id = smp_processor_id();
+       cpuid = smp_processor_id();
+       if (cpu_isset(cpuid, cpu_callin_map)) {
+               printk("huh, phys CPU#%d, CPU#%d already present??\n",
+                                       phys_id, cpuid);
+               BUG();
+       }
+       Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+
+       /*
+        * STARTUP IPIs are fragile beasts as they might sometimes
+        * trigger some glue motherboard logic. Complete APIC bus
+        * silence for 1 second, this overestimates the time the
+        * boot CPU is spending to send the up to 2 STARTUP IPIs
+        * by a factor of two. This should be enough.
+        */
+
+       /*
+        * Waiting 2s total for startup (udelay is not yet working)
+        */
+       timeout = jiffies + 2*HZ;
+       while (time_before(jiffies, timeout)) {
+               /*
+                * Has the boot CPU finished it's STARTUP sequence?
+                */
+               if (cpu_isset(cpuid, cpu_callout_map))
+                       break;
+               rep_nop();
+       }
+
+       if (!time_before(jiffies, timeout)) {
+               printk("BUG: CPU%d started up but did not get a callout!\n",
+                       cpuid);
+               BUG();
+       }
+
+#if 0
+       /*
+        * the boot CPU has finished the init stage and is spinning
+        * on callin_map until we finish. We are free to set up this
+        * CPU, first the APIC. (this is probably redundant on most
+        * boards)
+        */
+
+       Dprintk("CALLIN, before setup_local_APIC().\n");
+       smp_callin_clear_local_apic();
+       setup_local_APIC();
+#endif
+       map_cpu_to_logical_apicid();
+
+       local_irq_enable();
+
+       /*
+        * Get our bogomips.
+        */
+       calibrate_delay();
+       Dprintk("Stack at about %p\n",&cpuid);
+
+       /*
+        * Save our processor parameters
+        */
+       smp_store_cpu_info(cpuid);
+
+#if 0
+       disable_APIC_timer();
+#endif
+       local_irq_disable();
+       /*
+        * Allow the master to continue.
+        */
+       cpu_set(cpuid, cpu_callin_map);
+
+#if 0
+       /*
+        *      Synchronize the TSC with the BP
+        */
+       if (cpu_has_tsc && cpu_khz)
+               synchronize_tsc_ap();
+#endif
+}
+
+int cpucount;
+
+extern int cpu_idle(void);
+
+
+static irqreturn_t local_debug_interrupt(int irq, void *dev_id,
+                                        struct pt_regs *regs)
+{
+
+       return IRQ_HANDLED;
+}
+
+static struct irqaction local_irq_debug = {
+       local_debug_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ldebug",
+       NULL, NULL
+};
+
+void local_setup_debug(void)
+{
+       (void)setup_irq(bind_virq_to_irq(VIRQ_DEBUG), &local_irq_debug);
+}
+
+
+extern void local_setup_timer(void);
+
+/*
+ * Activate a secondary processor.
+ */
+int __init start_secondary(void *unused)
+{
+       /*
+        * Dont put anything before smp_callin(), SMP
+        * booting is too fragile that we want to limit the
+        * things done here to the most necessary things.
+        */
+       cpu_init();
+       smp_callin();
+       while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
+               rep_nop();
+       local_setup_timer();
+       local_setup_debug();    /* XXX */
+       smp_intr_init();
+       local_irq_enable();
+       /*
+        * low-memory mappings have been cleared, flush them from
+        * the local TLBs too.
+        */
+       local_flush_tlb();
+       cpu_set(smp_processor_id(), cpu_online_map);
+       wmb();
+       if (0) {
+               char *msg2 = "delay2\n";
+               int timeout;
+               for (timeout = 0; timeout < 50000; timeout++) {
+                       udelay(1000);
+                       if (timeout == 2000) {
+                               (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2);
+                               timeout = 0;
+                       }
+               }
+       }
+       return cpu_idle();
+}
+
+/*
+ * Everything has been set up for the secondary
+ * CPUs - they just need to reload everything
+ * from the task structure
+ * This function must not return.
+ */
+void __init initialize_secondary(void)
+{
+       /*
+        * We don't actually need to load the full TSS,
+        * basically just the stack pointer and the eip.
+        */
+
+       asm volatile(
+               "movl %0,%%esp\n\t"
+               "jmp *%1"
+               :
+               :"r" (current->thread.esp),"r" (current->thread.eip));
+}
+
+extern struct {
+       void * esp;
+       unsigned short ss;
+} stack_start;
+
+#ifdef CONFIG_NUMA
+
+/* which logical CPUs are on which nodes */
+cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
+                               { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
+/* which node each logical CPU is on */
+int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
+EXPORT_SYMBOL(cpu_2_node);
+
+/* set up a mapping between cpu and node. */
+static inline void map_cpu_to_node(int cpu, int node)
+{
+       printk("Mapping cpu %d to node %d\n", cpu, node);
+       cpu_set(cpu, node_2_cpu_mask[node]);
+       cpu_2_node[cpu] = node;
+}
+
+/* undo a mapping between cpu and node. */
+static inline void unmap_cpu_to_node(int cpu)
+{
+       int node;
+
+       printk("Unmapping cpu %d from all nodes\n", cpu);
+       for (node = 0; node < MAX_NUMNODES; node ++)
+               cpu_clear(cpu, node_2_cpu_mask[node]);
+       cpu_2_node[cpu] = 0;
+}
+#else /* !CONFIG_NUMA */
+
+#define map_cpu_to_node(cpu, node)     ({})
+#define unmap_cpu_to_node(cpu) ({})
+
+#endif /* CONFIG_NUMA */
+
+u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+void map_cpu_to_logical_apicid(void)
+{
+       int cpu = smp_processor_id();
+       int apicid = smp_processor_id();
+
+       cpu_2_logical_apicid[cpu] = apicid;
+       map_cpu_to_node(cpu, apicid_to_node(apicid));
+}
+
+void unmap_cpu_to_logical_apicid(int cpu)
+{
+       cpu_2_logical_apicid[cpu] = BAD_APICID;
+       unmap_cpu_to_node(cpu);
+}
+
+#if APIC_DEBUG
+static inline void __inquire_remote_apic(int apicid)
+{
+       int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+       char *names[] = { "ID", "VERSION", "SPIV" };
+       int timeout, status;
+
+       printk("Inquiring remote APIC #%d...\n", apicid);
+
+       for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+               printk("... APIC #%d %s: ", apicid, names[i]);
+
+               /*
+                * Wait for idle.
+                */
+               apic_wait_icr_idle();
+
+               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+               apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+
+               timeout = 0;
+               do {
+                       udelay(100);
+                       status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
+               } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
+
+               switch (status) {
+               case APIC_ICR_RR_VALID:
+                       status = apic_read(APIC_RRR);
+                       printk("%08x\n", status);
+                       break;
+               default:
+                       printk("failed\n");
+               }
+       }
+}
+#endif
+
+#if 0
+#ifdef WAKE_SECONDARY_VIA_NMI
+/* 
+ * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
+ * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
+ * won't ... remember to clear down the APIC, etc later.
+ */
+static int __init
+wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+{
+       unsigned long send_status = 0, accept_status = 0;
+       int timeout, maxlvt;
+
+       /* Target chip */
+       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
+
+       /* Boot on the stack */
+       /* Kick the second */
+       apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+
+       Dprintk("Waiting for send to finish...\n");
+       timeout = 0;
+       do {
+               Dprintk("+");
+               udelay(100);
+               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+       } while (send_status && (timeout++ < 1000));
+
+       /*
+        * Give the other CPU some time to accept the IPI.
+        */
+       udelay(200);
+       /*
+        * Due to the Pentium erratum 3AP.
+        */
+       maxlvt = get_maxlvt();
+       if (maxlvt > 3) {
+               apic_read_around(APIC_SPIV);
+               apic_write(APIC_ESR, 0);
+       }
+       accept_status = (apic_read(APIC_ESR) & 0xEF);
+       Dprintk("NMI sent.\n");
+
+       if (send_status)
+               printk("APIC never delivered???\n");
+       if (accept_status)
+               printk("APIC delivery error (%lx).\n", accept_status);
+
+       return (send_status | accept_status);
+}
+#endif /* WAKE_SECONDARY_VIA_NMI */
+
+#ifdef WAKE_SECONDARY_VIA_INIT
+static int __init
+wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
+{
+       unsigned long send_status = 0, accept_status = 0;
+       int maxlvt, timeout, num_starts, j;
+
+       /*
+        * Be paranoid about clearing APIC errors.
+        */
+       if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+               apic_read_around(APIC_SPIV);
+               apic_write(APIC_ESR, 0);
+               apic_read(APIC_ESR);
+       }
+
+       Dprintk("Asserting INIT.\n");
+
+       /*
+        * Turn INIT on target chip
+        */
+       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+       /*
+        * Send IPI
+        */
+       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+                               | APIC_DM_INIT);
+
+       Dprintk("Waiting for send to finish...\n");
+       timeout = 0;
+       do {
+               Dprintk("+");
+               udelay(100);
+               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+       } while (send_status && (timeout++ < 1000));
+
+       mdelay(10);
+
+       Dprintk("Deasserting INIT.\n");
+
+       /* Target chip */
+       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+       /* Send IPI */
+       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+
+       Dprintk("Waiting for send to finish...\n");
+       timeout = 0;
+       do {
+               Dprintk("+");
+               udelay(100);
+               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+       } while (send_status && (timeout++ < 1000));
+
+       atomic_set(&init_deasserted, 1);
+
+       /*
+        * Should we send STARTUP IPIs ?
+        *
+        * Determine this based on the APIC version.
+        * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+        */
+       if (APIC_INTEGRATED(apic_version[phys_apicid]))
+               num_starts = 2;
+       else
+               num_starts = 0;
+
+       /*
+        * Run STARTUP IPI loop.
+        */
+       Dprintk("#startup loops: %d.\n", num_starts);
+
+       maxlvt = get_maxlvt();
+
+       for (j = 1; j <= num_starts; j++) {
+               Dprintk("Sending STARTUP #%d.\n",j);
+               apic_read_around(APIC_SPIV);
+               apic_write(APIC_ESR, 0);
+               apic_read(APIC_ESR);
+               Dprintk("After apic_write.\n");
+
+               /*
+                * STARTUP IPI
+                */
+
+               /* Target chip */
+               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+               /* Boot on the stack */
+               /* Kick the second */
+               apic_write_around(APIC_ICR, APIC_DM_STARTUP
+                                       | (start_eip >> 12));
+
+               /*
+                * Give the other CPU some time to accept the IPI.
+                */
+               udelay(300);
+
+               Dprintk("Startup point 1.\n");
+
+               Dprintk("Waiting for send to finish...\n");
+               timeout = 0;
+               do {
+                       Dprintk("+");
+                       udelay(100);
+                       send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+               } while (send_status && (timeout++ < 1000));
+
+               /*
+                * Give the other CPU some time to accept the IPI.
+                */
+               udelay(200);
+               /*
+                * Due to the Pentium erratum 3AP.
+                */
+               if (maxlvt > 3) {
+                       apic_read_around(APIC_SPIV);
+                       apic_write(APIC_ESR, 0);
+               }
+               accept_status = (apic_read(APIC_ESR) & 0xEF);
+               if (send_status || accept_status)
+                       break;
+       }
+       Dprintk("After Startup.\n");
+
+       if (send_status)
+               printk("APIC never delivered???\n");
+       if (accept_status)
+               printk("APIC delivery error (%lx).\n", accept_status);
+
+       return (send_status | accept_status);
+}
+#endif /* WAKE_SECONDARY_VIA_INIT */
+#endif
+
+extern cpumask_t cpu_initialized;
+
+static int __init do_boot_cpu(int apicid)
+/*
+ * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
+ * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+ * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
+ */
+{
+       struct task_struct *idle;
+       unsigned long boot_error;
+       int timeout, cpu;
+       unsigned long start_eip;
+#if 0
+       unsigned short nmi_high = 0, nmi_low = 0;
+#endif
+       full_execution_context_t ctxt;
+       extern void startup_32_smp(void);
+       extern void hypervisor_callback(void);
+       extern void failsafe_callback(void);
+       extern int smp_trap_init(trap_info_t *);
+       int i;
+
+       cpu = ++cpucount;
+       /*
+        * We can't use kernel_thread since we must avoid to
+        * reschedule the child.
+        */
+       idle = fork_idle(cpu);
+       if (IS_ERR(idle))
+               panic("failed fork for CPU %d", cpu);
+       idle->thread.eip = (unsigned long) start_secondary;
+       /* start_eip had better be page-aligned! */
+       start_eip = (unsigned long)startup_32_smp;
+
+       /* So we see what's up   */
+       printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+       /* Stack for startup_32 can be just as for start_secondary onwards */
+       stack_start.esp = (void *) idle->thread.esp;
+
+       irq_ctx_init(cpu);
+
+       /*
+        * This grunge runs the startup process for
+        * the targeted processor.
+        */
+
+       atomic_set(&init_deasserted, 0);
+
+#if 1
+       if (cpu_gdt_descr[0].size > PAGE_SIZE)
+               BUG();
+       cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
+       memcpy((void *)cpu_gdt_descr[cpu].address,
+              (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
+               memset((char *)cpu_gdt_descr[cpu].address +
+                      FIRST_RESERVED_GDT_ENTRY * 8, 0,
+                      NR_RESERVED_GDT_ENTRIES * 8);
+
+       memset(&ctxt, 0, sizeof(ctxt));
+
+       ctxt.cpu_ctxt.ds = __USER_DS;
+       ctxt.cpu_ctxt.es = __USER_DS;
+       ctxt.cpu_ctxt.fs = 0;
+       ctxt.cpu_ctxt.gs = 0;
+       ctxt.cpu_ctxt.ss = __KERNEL_DS;
+       ctxt.cpu_ctxt.cs = __KERNEL_CS;
+       ctxt.cpu_ctxt.eip = start_eip;
+       ctxt.cpu_ctxt.esp = idle->thread.esp;
+       ctxt.cpu_ctxt.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
+
+       /* FPU is set up to default initial state. */
+       memset(ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+       /* Virtual IDT is empty at start-of-day. */
+       for ( i = 0; i < 256; i++ )
+       {
+               ctxt.trap_ctxt[i].vector = i;
+               ctxt.trap_ctxt[i].cs     = FLAT_GUESTOS_CS;
+       }
+       ctxt.fast_trap_idx = smp_trap_init(ctxt.trap_ctxt);
+
+       /* No LDT. */
+       ctxt.ldt_ents = 0;
+
+       {
+               unsigned long va;
+               int f;
+
+               for (va = cpu_gdt_descr[cpu].address, f = 0;
+                    va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
+                    va += PAGE_SIZE, f++) {
+                       ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+                       make_page_readonly((void *)va);
+               }
+               ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
+               flush_page_update_queue();
+       }
+
+       /* Ring 1 stack is the initial stack. */
+       ctxt.guestos_ss  = __KERNEL_DS;
+       ctxt.guestos_esp = idle->thread.esp;
+
+       /* Callback handlers. */
+       ctxt.event_callback_cs     = __KERNEL_CS;
+       ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
+       ctxt.failsafe_callback_cs  = __KERNEL_CS;
+       ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+
+       ctxt.pt_base = (unsigned long)virt_to_machine(swapper_pg_dir);
+
+       boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
+
+       if (!boot_error) {
+               /*
+                * allow APs to start initializing.
+                */
+               Dprintk("Before Callout %d.\n", cpu);
+               cpu_set(cpu, cpu_callout_map);
+               Dprintk("After Callout %d.\n", cpu);
+
+               /*
+                * Wait 5s total for a response
+                */
+               for (timeout = 0; timeout < 50000; timeout++) {
+                       if (cpu_isset(cpu, cpu_callin_map))
+                               break;  /* It has booted */
+                       udelay(100);
+               }
+
+               if (cpu_isset(cpu, cpu_callin_map)) {
+                       /* number CPUs logically, starting from 1 (BSP is 0) */
+                       Dprintk("OK.\n");
+                       printk("CPU%d: ", cpu);
+                       print_cpu_info(&cpu_data[cpu]);
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+               }
+       }
+       x86_cpu_to_apicid[cpu] = apicid;
+       if (boot_error) {
+               /* Try to put things back the way they were before ... */
+               unmap_cpu_to_logical_apicid(cpu);
+               cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+               cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+               cpucount--;
+       }
+
+#else
+       Dprintk("Setting warm reset code and vector.\n");
+
+       store_NMI_vector(&nmi_high, &nmi_low);
+
+       smpboot_setup_warm_reset_vector(start_eip);
+
+       /*
+        * Starting actual IPI sequence...
+        */
+       boot_error = wakeup_secondary_cpu(apicid, start_eip);
+
+       if (!boot_error) {
+               /*
+                * allow APs to start initializing.
+                */
+               Dprintk("Before Callout %d.\n", cpu);
+               cpu_set(cpu, cpu_callout_map);
+               Dprintk("After Callout %d.\n", cpu);
+
+               /*
+                * Wait 5s total for a response
+                */
+               for (timeout = 0; timeout < 50000; timeout++) {
+                       if (cpu_isset(cpu, cpu_callin_map))
+                               break;  /* It has booted */
+                       udelay(100);
+               }
+
+               if (cpu_isset(cpu, cpu_callin_map)) {
+                       /* number CPUs logically, starting from 1 (BSP is 0) */
+                       Dprintk("OK.\n");
+                       printk("CPU%d: ", cpu);
+                       print_cpu_info(&cpu_data[cpu]);
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+                       if (*((volatile unsigned char *)trampoline_base)
+                                       == 0xA5)
+                               /* trampoline started but...? */
+                               printk("Stuck ??\n");
+                       else
+                               /* trampoline code not run */
+                               printk("Not responding.\n");
+                       inquire_remote_apic(apicid);
+               }
+       }
+       x86_cpu_to_apicid[cpu] = apicid;
+       if (boot_error) {
+               /* Try to put things back the way they were before ... */
+               unmap_cpu_to_logical_apicid(cpu);
+               cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+               cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+               cpucount--;
+       }
+
+       /* mark "stuck" area as not stuck */
+       *((volatile unsigned long *)trampoline_base) = 0;
+#endif
+
+       return boot_error;
+}
+
+cycles_t cacheflush_time;
+unsigned long cache_decay_ticks;
+
+static void smp_tune_scheduling (void)
+{
+       unsigned long cachesize;       /* kB   */
+       unsigned long bandwidth = 350; /* MB/s */
+       /*
+        * Rough estimation for SMP scheduling, this is the number of
+        * cycles it takes for a fully memory-limited process to flush
+        * the SMP-local cache.
+        *
+        * (For a P5 this pretty much means we will choose another idle
+        *  CPU almost always at wakeup time (this is due to the small
+        *  L1 cache), on PIIs it's around 50-100 usecs, depending on
+        *  the cache size)
+        */
+
+       if (!cpu_khz) {
+               /*
+                * this basically disables processor-affinity
+                * scheduling on SMP without a TSC.
+                */
+               cacheflush_time = 0;
+               return;
+       } else {
+               cachesize = boot_cpu_data.x86_cache_size;
+               if (cachesize == -1) {
+                       cachesize = 16; /* Pentiums, 2x8kB cache */
+                       bandwidth = 100;
+               }
+
+               cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
+       }
+
+       cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
+
+       printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
+               (long)cacheflush_time/(cpu_khz/1000),
+               ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
+       printk("task migration cache decay timeout: %ld msecs.\n",
+               cache_decay_ticks);
+}
+
+/*
+ * Cycle through the processors sending APIC IPIs to boot each.
+ */
+
+#if 0
+static int boot_cpu_logical_apicid;
+#endif
+/* Where the IO area was mapped on multiquad, always 0 otherwise */
+void *xquad_portio;
+
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+
+static void __init smp_boot_cpus(unsigned int max_cpus)
+{
+       int cpu, kicked;
+       unsigned long bogosum = 0;
+#if 0
+       int apicid, bit;
+#endif
+
+       /*
+        * Setup boot CPU information
+        */
+       smp_store_cpu_info(0); /* Final full version of the data */
+       printk("CPU%d: ", 0);
+       print_cpu_info(&cpu_data[0]);
+
+#if 0
+       boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+       boot_cpu_logical_apicid = logical_smp_processor_id();
+       x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
+#else
+       // boot_cpu_physical_apicid = 0;
+       // boot_cpu_logical_apicid = 0;
+       x86_cpu_to_apicid[0] = 0;
+#endif
+
+       current_thread_info()->cpu = 0;
+       smp_tune_scheduling();
+       cpus_clear(cpu_sibling_map[0]);
+       cpu_set(0, cpu_sibling_map[0]);
+
+       /*
+        * If we couldn't find an SMP configuration at boot time,
+        * get out of here now!
+        */
+       if (!smp_found_config /* && !acpi_lapic) */) {
+               printk(KERN_NOTICE "SMP motherboard not detected.\n");
+               smpboot_clear_io_apic_irqs();
+#if 0
+               phys_cpu_present_map = physid_mask_of_physid(0);
+               if (APIC_init_uniprocessor())
+                       printk(KERN_NOTICE "Local APIC not detected."
+                                          " Using dummy APIC emulation.\n");
+#endif
+               map_cpu_to_logical_apicid();
+               return;
+       }
+
+#if 0
+       /*
+        * Should not be necessary because the MP table should list the boot
+        * CPU too, but we do it for the sake of robustness anyway.
+        * Makes no sense to do this check in clustered apic mode, so skip it
+        */
+       if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
+               printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+                               boot_cpu_physical_apicid);
+               physid_set(hard_smp_processor_id(), phys_cpu_present_map);
+       }
+
+       /*
+        * If we couldn't find a local APIC, then get out of here now!
+        */
+       if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
+               printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+                       boot_cpu_physical_apicid);
+               printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+               smpboot_clear_io_apic_irqs();
+               phys_cpu_present_map = physid_mask_of_physid(0);
+               return;
+       }
+
+       verify_local_APIC();
+#endif
+
+       /*
+        * If SMP should be disabled, then really disable it!
+        */
+       if (!max_cpus) {
+               HYPERVISOR_shared_info->n_vcpu = 1;
+               printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+               smpboot_clear_io_apic_irqs();
+#if 0
+               phys_cpu_present_map = physid_mask_of_physid(0);
+#endif
+               return;
+       }
+
+       smp_intr_init();
+
+#if 0
+       connect_bsp_APIC();
+       setup_local_APIC();
+#endif
+       map_cpu_to_logical_apicid();
+#if 0
+
+
+       setup_portio_remap();
+
+       /*
+        * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
+        *
+        * In clustered apic mode, phys_cpu_present_map is a constructed thus:
+        * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
+        * clustered apic ID.
+        */
+       Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
+#endif
+       Dprintk("CPU present map: %lx\n",
+               (1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
+
+       kicked = 1;
+       for (cpu = 1; kicked < NR_CPUS &&
+                    cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
+               if (max_cpus <= cpucount+1)
+                       continue;
+
+               if (do_boot_cpu(cpu))
+                       printk("CPU #%d not responding - cannot use it.\n",
+                                                               cpu);
+               else
+                       ++kicked;
+       }
+
+#if 0
+       /*
+        * Cleanup possible dangling ends...
+        */
+       smpboot_restore_warm_reset_vector();
+#endif
+
+       /*
+        * Allow the user to impress friends.
+        */
+       Dprintk("Before bogomips.\n");
+       for (cpu = 0; cpu < NR_CPUS; cpu++)
+               if (cpu_isset(cpu, cpu_callout_map))
+                       bogosum += cpu_data[cpu].loops_per_jiffy;
+       printk(KERN_INFO
+               "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+               cpucount+1,
+               bogosum/(500000/HZ),
+               (bogosum/(5000/HZ))%100);
+       
+       Dprintk("Before bogocount - setting activated=1.\n");
+
+       if (smp_b_stepping)
+               printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
+
+       /*
+        * Don't taint if we are running SMP kernel on a single non-MP
+        * approved Athlon
+        */
+       if (tainted & TAINT_UNSAFE_SMP) {
+               if (cpucount)
+                       printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
+               else
+                       tainted &= ~TAINT_UNSAFE_SMP;
+       }
+
+       Dprintk("Boot done.\n");
+
+       /*
+        * construct cpu_sibling_map[], so that we can tell sibling CPUs
+        * efficiently.
+        */
+       for (cpu = 0; cpu < NR_CPUS; cpu++)
+               cpus_clear(cpu_sibling_map[cpu]);
+
+       for (cpu = 0; cpu < NR_CPUS; cpu++) {
+               int siblings = 0;
+               int i;
+               if (!cpu_isset(cpu, cpu_callout_map))
+                       continue;
+
+               if (smp_num_siblings > 1) {
+                       for (i = 0; i < NR_CPUS; i++) {
+                               if (!cpu_isset(i, cpu_callout_map))
+                                       continue;
+                               if (phys_proc_id[cpu] == phys_proc_id[i]) {
+                                       siblings++;
+                                       cpu_set(i, cpu_sibling_map[cpu]);
+                               }
+                       }
+               } else {
+                       siblings++;
+                       cpu_set(cpu, cpu_sibling_map[cpu]);
+               }
+
+               if (siblings != smp_num_siblings)
+                       printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
+       }
+
+#if 0
+       if (nmi_watchdog == NMI_LOCAL_APIC)
+               check_nmi_watchdog();
+
+       smpboot_setup_io_apic();
+
+       setup_boot_APIC_clock();
+
+       /*
+        * Synchronize the TSC with the AP
+        */
+       if (cpu_has_tsc && cpucount && cpu_khz)
+               synchronize_tsc_bp();
+#endif
+}
+
+/* These are wrappers to interface to the new boot process.  Someone
+   who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+       smp_boot_cpus(max_cpus);
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+       cpu_set(smp_processor_id(), cpu_online_map);
+       cpu_set(smp_processor_id(), cpu_callout_map);
+}
+
+int __devinit __cpu_up(unsigned int cpu)
+{
+       /* This only works at boot for x86.  See "rewrite" above. */
+       if (cpu_isset(cpu, smp_commenced_mask)) {
+               local_irq_enable();
+               return -ENOSYS;
+       }
+
+       /* In case one didn't come up */
+       if (!cpu_isset(cpu, cpu_callin_map)) {
+               local_irq_enable();
+               return -EIO;
+       }
+
+       local_irq_enable();
+       /* Unleash the CPU! */
+       cpu_set(cpu, smp_commenced_mask);
+       while (!cpu_isset(cpu, cpu_online_map))
+               mb();
+       return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+#if 1
+#else
+#ifdef CONFIG_X86_IO_APIC
+       setup_ioapic_dest();
+#endif
+       zap_low_mappings();
+       /*
+        * Disable executability of the SMP trampoline:
+        */
+       set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+#endif
+}
+
+extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
+
+static struct irqaction reschedule_irq = {
+       smp_reschedule_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "reschedule",
+       NULL, NULL
+};
+
+extern irqreturn_t smp_invalidate_interrupt(int, void *, struct pt_regs *);
+
+static struct irqaction invalidate_irq = {
+       smp_invalidate_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "invalidate",
+       NULL, NULL
+};
+
+extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
+
+static struct irqaction call_function_irq = {
+       smp_call_function_interrupt, SA_INTERRUPT, CPU_MASK_NONE,
+       "call_function", NULL, NULL
+};
+
+void __init smp_intr_init(void)
+{
+
+       (void)setup_irq(
+           bind_ipi_on_cpu_to_irq(smp_processor_id(), RESCHEDULE_VECTOR),
+           &reschedule_irq);
+       (void)setup_irq(
+           bind_ipi_on_cpu_to_irq(smp_processor_id(), INVALIDATE_TLB_VECTOR),
+           &invalidate_irq);
+       (void)setup_irq(
+           bind_ipi_on_cpu_to_irq(smp_processor_id(), CALL_FUNCTION_VECTOR),
+           &call_function_irq);
+}
diff --git a/linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c b/linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c
new file mode 100644 (file)
index 0000000..51addc6
--- /dev/null
@@ -0,0 +1,19 @@
+/* Copyright (C) 2004, Christian Limpach */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/threads.h>
+
+unsigned int __initdata maxcpus = NR_CPUS;
+
+
+/*
+ * the frequency of the profiling timer can be changed
+ * by writing a multiplier value into /proc/profile.
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+       printk("setup_profiling_timer\n");
+
+       return 0;
+}
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile b/linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile
new file mode 100644 (file)
index 0000000..80b7ca0
--- /dev/null
@@ -0,0 +1,3 @@
+
+obj-y  := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o 
+
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c
new file mode 100644 (file)
index 0000000..5e7d47c
--- /dev/null
@@ -0,0 +1,86 @@
+/******************************************************************************
+ * blktap.c
+ * 
+ * XenLinux virtual block-device tap.
+ * 
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ * Based on the original split block driver:
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004, Christian Limpach
+ * 
+ * Note that unlike the split block driver code, this driver has been developed
+ * strictly for Linux 2.6
+ */
+
+#include "blktap.h"
+
+int __init xlblk_init(void)
+{
+    ctrl_msg_t               cmsg;
+    blkif_fe_driver_status_t fe_st;
+    blkif_be_driver_status_t be_st;
+
+    printk(KERN_INFO "Initialising Xen block tap device\n");
+
+    DPRINTK("   tap - Backend connection init:\n");
+
+
+    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
+                                    CALLBACK_IN_BLOCKING_CONTEXT);
+
+    /* Send a driver-UP notification to the domain controller. */
+    cmsg.type      = CMSG_BLKIF_FE;
+    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
+    cmsg.length    = sizeof(blkif_fe_driver_status_t);
+    fe_st.status   = BLKIF_DRIVER_STATUS_UP;
+    memcpy(cmsg.msg, &fe_st, sizeof(fe_st));
+    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
+    DPRINTK("   tap - Frontend connection init:\n");
+    
+    active_reqs_init();
+    
+    ptfe_blkif.status = DISCONNECTED;
+
+    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, 
+                                    CALLBACK_IN_BLOCKING_CONTEXT);
+
+    /* Send a driver-UP notification to the domain controller. */
+    cmsg.type      = CMSG_BLKIF_BE;
+    cmsg.subtype   = CMSG_BLKIF_BE_DRIVER_STATUS;
+    cmsg.length    = sizeof(blkif_be_driver_status_t);
+    be_st.status   = BLKIF_DRIVER_STATUS_UP;
+    memcpy(cmsg.msg, &be_st, sizeof(be_st));
+    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
+    DPRINTK("   tap - Userland channel init:\n");
+
+    blktap_init();
+
+    DPRINTK("Blkif tap device initialized.\n");
+
+    return 0;
+}
+
+void blkdev_suspend(void)
+{
+}
+
+void blkdev_resume(void)
+{
+    ctrl_msg_t               cmsg;
+    blkif_fe_driver_status_t st;    
+
+    /* Send a driver-UP notification to the domain controller. */
+    cmsg.type      = CMSG_BLKIF_FE;
+    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
+    cmsg.length    = sizeof(blkif_fe_driver_status_t);
+    st.status      = BLKIF_DRIVER_STATUS_UP;
+    memcpy(cmsg.msg, &st, sizeof(st));
+    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+
+__initcall(xlblk_init);
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h
new file mode 100644 (file)
index 0000000..7e5d73d
--- /dev/null
@@ -0,0 +1,254 @@
+/*
+ * blktap.h
+ * 
+ * Interfaces for the Xen block tap driver.
+ * 
+ * (c) 2004, Andrew Warfield, University of Cambridge
+ * 
+ */
+
+#ifndef __BLKTAP_H__
+#define __BLKTAP_H__
+
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <asm-xen/ctrl_if.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/pgalloc.h>
+#include <asm-xen/hypervisor.h>
+#include <asm-xen/xen-public/io/blkif.h>
+
+/* -------[ debug / pretty printing ]--------------------------------- */
+
+#if 0
+#define ASSERT(_p) \
+    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
+    __LINE__, __FILE__); *(int*)0=0; }
+#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
+                           __FILE__ , __LINE__ , ## _a )
+#else
+#define ASSERT(_p) ((void)0)
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
+
+/* -------[ connection / request tracking ]--------------------------- */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#define VMALLOC_VMADDR(x) ((unsigned long)(x))
+#endif
+
+extern spinlock_t blkif_io_lock;
+
+typedef struct blkif_st {
+    /* Unique identifier for this interface. */
+    domid_t          domid;
+    unsigned int     handle;
+    /* Physical parameters of the comms window. */
+    unsigned long    shmem_frame;
+    unsigned int     evtchn;
+    int              irq;
+    /* Comms information. */
+    blkif_ring_t    *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
+    BLKIF_RING_IDX     blk_req_cons;  /* Request consumer. */
+    BLKIF_RING_IDX     blk_resp_prod; /* Private version of resp. producer. */
+    
+    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+    /*
+     * DISCONNECT response is deferred until pending requests are ack'ed.
+     * We therefore need to store the id from the original request.
+     */    u8               disconnect_rspid;
+    struct blkif_st *hash_next;
+    struct list_head blkdev_list;
+    spinlock_t       blk_ring_lock;
+    atomic_t         refcnt;
+    
+    struct work_struct work;
+} blkif_t;
+
+typedef struct {
+    blkif_t       *blkif;
+    unsigned long  id;
+    int            nr_pages;
+    unsigned long  mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    unsigned long  virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    int            next_free;
+} active_req_t;
+
+
+/* -------[ block ring structs ]-------------------------------------- */
+
+/* Types of ring. */
+#define BLKIF_REQ_RING_TYPE 1
+#define BLKIF_RSP_RING_TYPE 2
+
+/* generic ring struct. */
+typedef struct blkif_generic_ring_struct {
+    int type;
+} blkif_generic_ring_t;
+
+/* A requestor's view of a ring. */
+typedef struct blkif_req_ring_struct {
+
+    int type;                    /* Will be BLKIF_REQ_RING_TYPE        */
+    BLKIF_RING_IDX req_prod;     /* PRIVATE req_prod index             */
+    BLKIF_RING_IDX rsp_cons;     /* Response consumer index            */
+    blkif_ring_t *ring;          /* Pointer to shared ring struct      */
+
+} blkif_req_ring_t;
+
+#define BLKIF_REQ_RING_INIT { BLKIF_REQ_RING_TYPE, 0, 0, 0 }
+
+/* A responder's view of a ring. */
+typedef struct blkif_rsp_ring_struct {
+
+    int type;       
+    BLKIF_RING_IDX rsp_prod;     /* PRIVATE rsp_prod index             */
+    BLKIF_RING_IDX req_cons;     /* Request consumer index             */
+    blkif_ring_t *ring;          /* Pointer to shared ring struct      */
+
+} blkif_rsp_ring_t;
+
+#define BLKIF_RSP_RING_INIT = { BLKIF_RSP_RING_TYPE, 0, 0, 0 }
+
+#define RING(a) (blkif_generic_ring_t *)(a)
+
+inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring);
+
+
+/* -------[ interposition -> character device interface ]------------- */
+
+/* /dev/xen/blktap resides at device number major=10, minor=200        */ 
+#define BLKTAP_MINOR 202
+
+/* size of the extra VMA area to map in attached pages. */
+#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
+
+/* blktap IOCTLs:                                                      */
+#define BLKTAP_IOCTL_KICK_FE         1
+#define BLKTAP_IOCTL_KICK_BE         2
+#define BLKTAP_IOCTL_SETMODE         3
+
+/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
+#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
+#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
+#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
+#define BLKTAP_MODE_COPY_FE          0x00000004
+#define BLKTAP_MODE_COPY_BE          0x00000008
+#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
+#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
+
+#define BLKTAP_MODE_INTERPOSE \
+           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
+
+#define BLKTAP_MODE_COPY_BOTH \
+           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
+
+#define BLKTAP_MODE_COPY_BOTH_PAGES \
+           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
+
+static inline int BLKTAP_MODE_VALID(unsigned long arg)
+{
+    return (
+        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
+        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
+        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
+        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
+        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
+        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
+        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
+        );
+}
+
+
+
+/* -------[ Mappings to User VMA ]------------------------------------ */
+#define MAX_PENDING_REQS 64
+#define BATCH_PER_DOMAIN 16
+extern struct vm_area_struct *blktap_vma;
+
+/* The following are from blkback.c and should probably be put in a
+ * header and included from there.
+ * The mmap area described here is where attached data pages eill be mapped.
+ */
+extern unsigned long mmap_vstart;
+#define MMAP_PAGES_PER_REQUEST \
+    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
+#define MMAP_PAGES             \
+    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
+#define MMAP_VADDR(_req,_seg)                        \
+    (mmap_vstart +                                   \
+     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+     ((_seg) * PAGE_SIZE))
+
+/* immediately before the mmap area, we have a bunch of pages reserved
+ * for shared memory rings.
+ */
+
+#define RING_PAGES 128 
+extern unsigned long rings_vstart;
+
+/* -------[ Here be globals ]----------------------------------------- */
+
+extern unsigned long blktap_mode;
+
+
+/* blkif struct, containing ring to FE domain */
+extern blkif_t ptfe_blkif; 
+
+/* Connection to a single backend domain. */
+extern blkif_ring_t *blk_ptbe_ring;   /* Ring from the PT to the BE dom    */ 
+extern BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */
+extern BLKIF_RING_IDX ptbe_req_prod;  /* Private request producer.         */
+
+/* Rings up to user space. */ 
+extern blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT;
+extern blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT;
+
+/* Event channel to backend domain. */
+extern unsigned int blkif_ptbe_evtchn;
+
+/* User ring status... this will soon vanish into a ring struct. */
+extern unsigned long blktap_ring_ok;
+
+/* -------[ ...and function prototypes. ]----------------------------- */
+
+/* init function for character device interface.                       */
+int blktap_init(void);
+
+/* interfaces to the char driver, passing messages to and from apps.   */
+void blktap_kick_user(void);
+int blktap_write_to_ring(blkif_request_t *req);
+
+
+/* user ring access functions: */
+int blktap_write_fe_ring(blkif_request_t *req);
+int blktap_write_be_ring(blkif_response_t *rsp);
+int blktap_read_fe_ring(void);
+int blktap_read_be_ring(void);
+
+/* and the helpers they call: */
+inline int write_resp_to_fe_ring(blkif_response_t *rsp);
+inline void kick_fe_domain(void);
+
+inline int write_req_to_be_ring(blkif_request_t *req);
+inline void kick_be_domain(void);
+
+/* Interrupt handlers. */
+irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
+                                  struct pt_regs *ptregs);
+irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs);
+
+/* Control message receiver. */
+extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
+
+#define __BLKINT_H__
+#endif
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
new file mode 100644 (file)
index 0000000..a3d485a
--- /dev/null
@@ -0,0 +1,358 @@
+/******************************************************************************
+ * blktap_controlmsg.c
+ * 
+ * XenLinux virtual block-device tap.
+ * Control interfaces to the frontend and backend drivers.
+ * 
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ */
+#include "blktap.h"
+
+#define BLKIF_STATE_CLOSED       0
+#define BLKIF_STATE_DISCONNECTED 1
+#define BLKIF_STATE_CONNECTED    2
+
+static char *blkif_state_name[] = {
+    [BLKIF_STATE_CLOSED]       = "closed",
+    [BLKIF_STATE_DISCONNECTED] = "disconnected",
+    [BLKIF_STATE_CONNECTED]    = "connected",
+};
+
+static char * blkif_status_name[] = {
+    [BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
+    [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
+    [BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
+    [BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
+};
+static unsigned int blkif_pt_state = BLKIF_STATE_CLOSED;
+static unsigned blkif_ptbe_irq;
+unsigned int blkif_ptbe_evtchn;
+
+/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/
+
+
+void blkif_ptfe_create(blkif_be_create_t *create)
+{
+    blkif_t      *blkif;
+    domid_t       domid  = create->domid;
+    unsigned int  handle = create->blkif_handle;
+
+
+    /* May want to store info on the connecting domain here. */
+
+    DPRINTK("PT got BE_CREATE\n");
+    blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */
+
+    /* blkif struct init code from blkback.c */
+    memset(blkif, 0, sizeof(*blkif));
+    blkif->domid  = domid;
+    blkif->handle = handle;
+    blkif->status = DISCONNECTED;    
+    spin_lock_init(&blkif->blk_ring_lock);
+    atomic_set(&blkif->refcnt, 0);
+
+    create->status = BLKIF_BE_STATUS_OKAY;
+}
+
+
+void blkif_ptfe_destroy(blkif_be_destroy_t *destroy)
+{
+    /* Clear anything that we initialized above. */
+
+    DPRINTK("PT got BE_DESTROY\n");
+    destroy->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void blkif_ptfe_connect(blkif_be_connect_t *connect)
+{
+    domid_t       domid  = connect->domid;
+    /*unsigned int  handle = connect->blkif_handle;*/
+    unsigned int  evtchn = connect->evtchn;
+    unsigned long shmem_frame = connect->shmem_frame;
+    struct vm_struct *vma;
+    pgprot_t      prot;
+    int           error;
+    blkif_t      *blkif;
+
+    DPRINTK("PT got BE_CONNECT\n");
+
+    blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */
+
+    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+    {
+        connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+        return;
+    }
+
+    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
+                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+                                    prot, domid);
+    if ( error != 0 )
+    {
+        WPRINTK("BE_CONNECT: error! (%d)\n", error);
+        if ( error == -ENOMEM ) 
+            connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+        else if ( error == -EFAULT ) {
+            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
+            WPRINTK("BE_CONNECT: MAPPING error!\n");
+        }
+        else
+            connect->status = BLKIF_BE_STATUS_ERROR;
+        vfree(vma->addr);
+        return;
+    }
+
+    if ( blkif->status != DISCONNECTED )
+    {
+        connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+        vfree(vma->addr);
+        return;
+    }
+
+    blkif->evtchn        = evtchn;
+    blkif->irq           = bind_evtchn_to_irq(evtchn);
+    blkif->shmem_frame   = shmem_frame;
+    blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
+    blkif->status        = CONNECTED;
+    /*blkif_get(blkif);*/
+
+    request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
+
+    connect->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect)
+{
+    /*
+     * don't actually set the passthrough to disconnected.
+     * We just act as a pipe, and defer to the real ends to handle things like
+     * recovery.
+     */
+
+    DPRINTK("PT got BE_DISCONNECT\n");
+
+    disconnect->status = BLKIF_BE_STATUS_OKAY;
+    return;
+}
+
+/*-----[ Control Messages to/from Backend VM ]----------------------------*/
+
+/* Tell the controller to bring up the interface. */
+static void blkif_ptbe_send_interface_connect(void)
+{
+    ctrl_msg_t cmsg = {
+        .type    = CMSG_BLKIF_FE,
+        .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
+        .length  = sizeof(blkif_fe_interface_connect_t),
+    };
+    blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
+    msg->handle      = 0;
+    msg->shmem_frame = virt_to_machine(blk_ptbe_ring) >> PAGE_SHIFT;
+    
+    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+static void blkif_ptbe_close(void)
+{
+}
+
+/* Move from CLOSED to DISCONNECTED state. */
+static void blkif_ptbe_disconnect(void)
+{
+    blk_ptbe_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
+    blk_ptbe_ring->req_prod = blk_ptbe_ring->resp_prod 
+                            = ptbe_resp_cons = ptbe_req_prod = 0;
+    blkif_pt_state  = BLKIF_STATE_DISCONNECTED;
+    DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n");
+    blkif_ptbe_send_interface_connect();
+}
+
+static void blkif_ptbe_connect(blkif_fe_interface_status_t *status)
+{
+    int err = 0;
+    
+    blkif_ptbe_evtchn = status->evtchn;
+    blkif_ptbe_irq    = bind_evtchn_to_irq(blkif_ptbe_evtchn);
+
+    err = request_irq(blkif_ptbe_irq, blkif_ptbe_int, 
+                      SA_SAMPLE_RANDOM, "blkif", NULL);
+    if ( err ) {
+       WPRINTK("blkfront request_irq failed (%d)\n", err);
+        return;
+    } else {
+       /* transtion to connected in case we need to do a 
+           a partion probe on a whole disk */
+        blkif_pt_state = BLKIF_STATE_CONNECTED;
+    }
+}
+
+static void unexpected(blkif_fe_interface_status_t *status)
+{
+    WPRINTK(" TAP: Unexpected blkif status %s in state %s\n", 
+           blkif_status_name[status->status],
+           blkif_state_name[blkif_pt_state]);
+}
+
+static void blkif_ptbe_status(
+    blkif_fe_interface_status_t *status)
+{
+    if ( status->handle != 0 )
+    {
+        DPRINTK("Status change on unsupported blkif %d\n",
+               status->handle);
+        return;
+    }
+
+    DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]);
+    
+    switch ( status->status )
+    {
+    case BLKIF_INTERFACE_STATUS_CLOSED:
+        switch ( blkif_pt_state )
+        {
+        case BLKIF_STATE_CLOSED:
+            unexpected(status);
+            break;
+        case BLKIF_STATE_DISCONNECTED:
+        case BLKIF_STATE_CONNECTED:
+            unexpected(status);
+            blkif_ptbe_close();
+            break;
+        }
+        break;
+        
+    case BLKIF_INTERFACE_STATUS_DISCONNECTED:
+        switch ( blkif_pt_state )
+        {
+        case BLKIF_STATE_CLOSED:
+            blkif_ptbe_disconnect();
+            break;
+        case BLKIF_STATE_DISCONNECTED:
+        case BLKIF_STATE_CONNECTED:
+            printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n");
+            unexpected(status);
+            break;
+        }
+        break;
+        
+    case BLKIF_INTERFACE_STATUS_CONNECTED:
+        switch ( blkif_pt_state )
+        {
+        case BLKIF_STATE_CLOSED:
+            unexpected(status);
+            blkif_ptbe_disconnect();
+            blkif_ptbe_connect(status);
+            break;
+        case BLKIF_STATE_DISCONNECTED:
+            blkif_ptbe_connect(status);
+            break;
+        case BLKIF_STATE_CONNECTED:
+            unexpected(status);
+            blkif_ptbe_connect(status);
+            break;
+        }
+        break;
+
+   case BLKIF_INTERFACE_STATUS_CHANGED:
+        switch ( blkif_pt_state )
+        {
+        case BLKIF_STATE_CLOSED:
+        case BLKIF_STATE_DISCONNECTED:
+            unexpected(status);
+            break;
+        case BLKIF_STATE_CONNECTED:
+            /* vbd_update(); */
+            /* tap doesn't really get state changes... */
+            unexpected(status);
+            break;
+        }
+       break;
+       
+    default:
+        DPRINTK("Status change to unknown value %d\n", status->status);
+        break;
+    }
+}
+
+/*-----[ All control messages enter here: ]-------------------------------*/
+
+void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+    switch ( msg->type )
+    {
+    case CMSG_BLKIF_FE:
+
+        switch ( msg->subtype )
+        {
+        case CMSG_BLKIF_FE_INTERFACE_STATUS:
+            if ( msg->length != sizeof(blkif_fe_interface_status_t) )
+                goto parse_error;
+            blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]);
+            break;        
+
+        default:
+            goto parse_error;
+        }
+
+    case CMSG_BLKIF_BE:
+        
+        switch ( msg->subtype )
+        {
+        case CMSG_BLKIF_BE_CREATE:
+            if ( msg->length != sizeof(blkif_be_create_t) )
+                goto parse_error;
+            blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]);
+            break; 
+        case CMSG_BLKIF_BE_DESTROY:
+            if ( msg->length != sizeof(blkif_be_destroy_t) )
+                goto parse_error;
+            blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]);
+            break;        
+        case CMSG_BLKIF_BE_CONNECT:
+            if ( msg->length != sizeof(blkif_be_connect_t) )
+                goto parse_error;
+            blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]);
+            break;        
+        case CMSG_BLKIF_BE_DISCONNECT:
+            if ( msg->length != sizeof(blkif_be_disconnect_t) )
+                goto parse_error;
+            blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0]);
+            break;        
+
+        /* We just ignore anything to do with vbds for now. */
+        
+        case CMSG_BLKIF_BE_VBD_CREATE:
+            DPRINTK("PT got VBD_CREATE\n");
+            ((blkif_be_vbd_create_t *)&msg->msg[0])->status 
+                = BLKIF_BE_STATUS_OKAY;
+            break;
+        case CMSG_BLKIF_BE_VBD_DESTROY:
+            DPRINTK("PT got VBD_DESTROY\n");
+            ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status
+                = BLKIF_BE_STATUS_OKAY;
+            break;
+        case CMSG_BLKIF_BE_VBD_GROW:
+            DPRINTK("PT got VBD_GROW\n");
+            ((blkif_be_vbd_grow_t *)&msg->msg[0])->status
+                = BLKIF_BE_STATUS_OKAY;
+            break;
+        case CMSG_BLKIF_BE_VBD_SHRINK:
+            DPRINTK("PT got VBD_SHRINK\n");
+            ((blkif_be_vbd_shrink_t *)&msg->msg[0])->status
+                = BLKIF_BE_STATUS_OKAY;
+            break;
+        default:
+            goto parse_error;
+        }
+    }
+
+    ctrl_if_send_response(msg);
+    return;
+
+ parse_error:
+    msg->length = 0;
+    ctrl_if_send_response(msg);
+}
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c
new file mode 100644 (file)
index 0000000..c8733dc
--- /dev/null
@@ -0,0 +1,517 @@
+/******************************************************************************
+ * blktap_datapath.c
+ * 
+ * XenLinux virtual block-device tap.
+ * Block request routing data path.
+ * 
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ */
+#include "blktap.h"
+
+/*-----[ The data paths ]-------------------------------------------------*/
+/* Connections to the frontend domains.*/
+blkif_t   ptfe_blkif; 
+/* Connection to a single backend domain. */
+blkif_ring_t *blk_ptbe_ring;   /* Ring from the PT to the BE dom    */ 
+BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */
+BLKIF_RING_IDX ptbe_req_prod;  /* Private request producer.         */
+
+/* Rings up to user space. */ 
+blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT;
+blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT;
+
+/*-----[ Ring helpers ]---------------------------------------------------*/
+
+inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring)
+{
+    if (ring->type == BLKIF_REQ_RING_TYPE) {
+        blkif_req_ring_t *r = (blkif_req_ring_t *)ring;
+        return ( ( r->req_prod - r->rsp_cons ) == BLKIF_RING_SIZE );
+    }
+    
+    /* for now assume that there is always room in the response path. */
+    return 0;
+}
+
+/*-----[ Tracking active requests ]---------------------------------------*/
+
+/* this must be the same as MAX_PENDING_REQS in blkback.c */
+#define MAX_ACTIVE_REQS 64
+
+active_req_t  active_reqs[MAX_ACTIVE_REQS];
+unsigned char active_req_ring[MAX_ACTIVE_REQS];
+spinlock_t    active_req_lock = SPIN_LOCK_UNLOCKED;
+typedef unsigned int ACTIVE_RING_IDX;
+ACTIVE_RING_IDX active_prod, active_cons;
+#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
+#define ACTIVE_IDX(_ar) (_ar - active_reqs)
+
+inline active_req_t *get_active_req(void) 
+{
+    ASSERT(active_cons != active_prod);    
+    return &active_reqs[MASK_ACTIVE_IDX(active_cons++)];
+}
+
+inline void free_active_req(active_req_t *ar) 
+{
+    unsigned long flags;
+        
+    spin_lock_irqsave(&active_req_lock, flags);
+    active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
+    spin_unlock_irqrestore(&active_req_lock, flags);
+}
+
+inline void active_reqs_init(void)
+{
+    ACTIVE_RING_IDX i;
+    
+    active_cons = 0;
+    active_prod = MAX_ACTIVE_REQS;
+    memset(active_reqs, 0, sizeof(active_reqs));
+    for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
+        active_req_ring[i] = i;
+}
+
+/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
+
+irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+    /* we have pending messages from the real frontend. */
+
+    blkif_request_t *req_s, *req_d;
+    BLKIF_RING_IDX fe_rp;
+    unsigned long flags;
+    int notify;
+    unsigned long i;
+    active_req_t *ar;
+    
+    DPRINTK("PT got FE interrupt.\n");
+    
+    /* lock both rings */
+    spin_lock_irqsave(&blkif_io_lock, flags);
+
+    /* While there are REQUESTS on FERing: */
+    fe_rp = ptfe_blkif.blk_ring_base->req_prod;
+    rmb();
+    notify = (ptfe_blkif.blk_req_cons != fe_rp);
+
+    for (i = ptfe_blkif.blk_req_cons; i != fe_rp; i++) {
+
+        /* Get the next request */
+        req_s = &ptfe_blkif.blk_ring_base->ring[MASK_BLKIF_IDX(i)].req;
+        
+        /* This is a new request:  
+         * Assign an active request record, and remap the id. 
+         */
+        ar = get_active_req();
+        ar->id = req_s->id;
+        req_s->id = ACTIVE_IDX(ar);
+        DPRINTK("%3lu < %3lu\n", req_s->id, ar->id);
+
+        /* FE -> BE interposition point is here. */
+        
+        /* ------------------------------------------------------------- */
+        /* BLKIF_OP_PROBE_HACK:                                          */
+        /* Until we have grant tables, we need to allow the backent to   */
+        /* map pages that are either from this domain, or more commonly  */
+        /* from the real front end.  We achieve this in a terrible way,  */
+        /* by passing the front end's domid allong with PROBE messages   */
+        /* Once grant tables appear, this should all go away.            */
+
+        if (req_s->operation == BLKIF_OP_PROBE) {
+            DPRINTK("Adding FE domid to PROBE request.\n");
+            (domid_t)(req_s->frame_and_sects[1]) = ptfe_blkif.domid;
+        }
+
+        /* ------------------------------------------------------------- */
+
+        /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
+        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
+            
+            /* Copy the response message to UFERing */
+            /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
+            /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
+
+            /* XXX: mapping/copying of attached pages is still not done! */
+
+            DPRINTK("req->UFERing\n"); 
+            blktap_write_fe_ring(req_s);
+
+
+        }
+
+        /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
+        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+               (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
+            
+            /* be included to prevent noise from the fe when its off */
+            /* copy the request message to the BERing */
+
+            DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", 
+                    (unsigned)MASK_BLKIF_IDX(i), 
+                    (unsigned)MASK_BLKIF_IDX(ptbe_req_prod));
+
+            req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req;
+            
+            memcpy(req_d, req_s, sizeof(blkif_request_t));
+
+            ptbe_req_prod++;
+        }
+    }
+
+    ptfe_blkif.blk_req_cons = i;
+
+    /* If we have forwarded any responses, notify the appropriate ends. */
+    if (notify) {
+
+        /* we have sent stuff to the be, notify it. */
+        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+               (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
+            wmb();
+            blk_ptbe_ring->req_prod = ptbe_req_prod;
+
+            notify_via_evtchn(blkif_ptbe_evtchn);
+            DPRINTK(" -- and notified.\n");
+        }
+
+        /* we sent stuff to the app, notify it. */
+        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
+
+            blktap_kick_user();
+        }
+    }
+
+    /* unlock rings */
+    spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+    return IRQ_HANDLED;
+}
+
+inline int write_req_to_be_ring(blkif_request_t *req)
+{
+    blkif_request_t *req_d;
+
+    req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req;
+    memcpy(req_d, req, sizeof(blkif_request_t));
+    ptbe_req_prod++;
+
+    return 0;
+}
+
+inline void kick_be_domain(void) {
+    wmb();
+    blk_ptbe_ring->req_prod = ptbe_req_prod;
+    notify_via_evtchn(blkif_ptbe_evtchn);
+}
+
+/*-----[ Data to/from Backend (server) VM ]------------------------------*/
+
+
+irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
+                                  struct pt_regs *ptregs)
+{
+    blkif_response_t  *resp_s, *resp_d;
+    BLKIF_RING_IDX be_rp;
+    unsigned long flags;
+    int notify;
+    unsigned long i;
+    active_req_t *ar;
+
+    DPRINTK("PT got BE interrupt.\n");
+
+    /* lock both rings */
+    spin_lock_irqsave(&blkif_io_lock, flags);
+    
+    /* While there are RESPONSES on BERing: */
+    be_rp = blk_ptbe_ring->resp_prod;
+    rmb();
+    notify = (ptbe_resp_cons != be_rp);
+    
+    for ( i = ptbe_resp_cons; i != be_rp; i++ )
+    {
+        /* BE -> FE interposition point is here. */
+        
+        /* Get the next response */
+        resp_s = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(i)].resp;
+    
+       
+        /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
+        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+             (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
+
+            /* Copy the response message to UBERing */
+            /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
+            /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
+
+            /* XXX: copy/map the attached page! */
+
+            DPRINTK("rsp->UBERing\n"); 
+            blktap_write_be_ring(resp_s);
+
+        }
+       
+        /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
+        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+               (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
+            
+            /* (fe included to prevent random interference from the BE) */
+            /* Copy the response message to FERing */
+         
+            DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", 
+                    (unsigned) MASK_BLKIF_IDX(i), 
+                    (unsigned) MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod));
+
+            /* remap id, and free the active req. blkif lookup goes here too.*/
+            ar = &active_reqs[resp_s->id];
+            DPRINTK("%3lu > %3lu\n", resp_s->id, ar->id);
+            resp_s->id = ar->id;
+            free_active_req(ar);
+           
+            resp_d = &ptfe_blkif.blk_ring_base->ring[
+                MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp;
+
+            memcpy(resp_d, resp_s, sizeof(blkif_response_t));
+            
+            ptfe_blkif.blk_resp_prod++;
+
+        }
+    }
+
+    ptbe_resp_cons = i;
+    
+    /* If we have forwarded any responses, notify the apropriate domains. */
+    if (notify) {
+
+        /* we have sent stuff to the fe.  notify it. */
+        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+               (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
+            wmb();
+            ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod;
+        
+            notify_via_evtchn(ptfe_blkif.evtchn);
+            DPRINTK(" -- and notified.\n");
+        }
+
+        /* we sent stuff to the app, notify it. */
+        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+             (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
+
+            blktap_kick_user();
+        }
+    }
+
+    spin_unlock_irqrestore(&blkif_io_lock, flags);
+    return IRQ_HANDLED;
+}
+
+inline int write_resp_to_fe_ring(blkif_response_t *rsp)
+{
+    blkif_response_t *resp_d;
+    active_req_t *ar;
+    
+    /* remap id, and free the active req. blkif lookup goes here too.*/
+    ar = &active_reqs[rsp->id];
+    DPRINTK("%3lu > %3lu\n", rsp->id, ar->id);
+    rsp->id = ar->id;
+    free_active_req(ar);
+            
+    resp_d = &ptfe_blkif.blk_ring_base->ring[
+        MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp;
+
+    memcpy(resp_d, rsp, sizeof(blkif_response_t));
+    ptfe_blkif.blk_resp_prod++;
+
+    return 0;
+}
+
+inline void kick_fe_domain(void) {
+    wmb();
+    ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod;
+    notify_via_evtchn(ptfe_blkif.evtchn);
+    
+}
+
+static inline void flush_requests(void)
+{
+    wmb(); /* Ensure that the frontend can see the requests. */
+    blk_ptbe_ring->req_prod = ptbe_req_prod;
+    notify_via_evtchn(blkif_ptbe_evtchn);
+}
+
+/*-----[ Data to/from user space ]----------------------------------------*/
+
+
+int blktap_write_fe_ring(blkif_request_t *req)
+{
+    blkif_request_t *target;
+    int error, i;
+
+    /*
+     * This is called to pass a request from the real frontend domain's
+     * blkif ring to the character device.
+     */
+
+    if ( ! blktap_ring_ok ) {
+        DPRINTK("blktap: fe_ring not ready for a request!\n");
+        return 0;
+    }
+
+    if ( BLKTAP_RING_FULL(RING(&fe_ring)) ) {
+        DPRINTK("blktap: fe_ring is full, can't add.\n");
+        return 0;
+    }
+
+    target = &fe_ring.ring->ring[MASK_BLKIF_IDX(fe_ring.req_prod)].req;
+    memcpy(target, req, sizeof(*req));
+
+/* maybe move this stuff out into a seperate func ------------------- */
+
+    /*
+     * For now, map attached page into a fixed position into the vma.
+     * XXX: make this map to a free page.
+     */
+
+    /* Attempt to map the foreign pages directly in to the application */
+    for (i=0; i<target->nr_segments; i++) {
+
+        /* get an unused virtual address from the char device */
+        /* store the old page address */
+        /* replace the address with the virtual address */
+
+        /* blktap_vma->vm_start+((2+i)*PAGE_SIZE) */
+
+        error = direct_remap_area_pages(blktap_vma->vm_mm, 
+                                        MMAP_VADDR(req->id, i), 
+                                        target->frame_and_sects[0] & PAGE_MASK,
+                                        PAGE_SIZE,
+                                        blktap_vma->vm_page_prot,
+                                        ptfe_blkif.domid);
+        if ( error != 0 ) {
+            printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
+            return 0;
+        }
+    }
+    /* fix the address of the attached page in the message. */
+    /* TODO:      preserve the segment number stuff here... */
+    /* target->frame_and_sects[0] = blktap_vma->vm_start + PAGE_SIZE;*/
+/* ------------------------------------------------------------------ */
+
+    
+    fe_ring.req_prod++;
+
+    return 0;
+}
+
+int blktap_write_be_ring(blkif_response_t *rsp)
+{
+    blkif_response_t *target;
+
+    /*
+     * This is called to pass a request from the real backend domain's
+     * blkif ring to the character device.
+     */
+
+    if ( ! blktap_ring_ok ) {
+        DPRINTK("blktap: be_ring not ready for a request!\n");
+        return 0;
+    }
+
+    if ( BLKTAP_RING_FULL(RING(&be_ring)) ) {
+        DPRINTK("blktap: be_ring is full, can't add.\n");
+        return 0;
+    }
+
+    target = &be_ring.ring->ring[MASK_BLKIF_IDX(be_ring.rsp_prod)].resp;
+    memcpy(target, rsp, sizeof(*rsp));
+
+
+    /* XXX: map attached pages and fix-up addresses in the copied address. */
+
+    be_ring.rsp_prod++;
+
+    return 0;
+}
+
+int blktap_read_fe_ring(void)
+{
+    /* This is called to read responses from the UFE ring. */
+
+    BLKIF_RING_IDX fe_rp;
+    unsigned long i;
+    int notify;
+
+    DPRINTK("blktap_read_fe_ring()\n");
+
+    fe_rp = fe_ring.ring->resp_prod;
+    rmb();
+    notify = (fe_rp != fe_ring.rsp_cons);
+
+    /* if we are forwarding from UFERring to FERing */
+    if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
+
+        /* for each outstanding message on the UFEring  */
+        for ( i = fe_ring.rsp_cons; i != fe_rp; i++ ) {
+
+            /* XXX: remap pages on that message as necessary */
+            /* copy the message to the UBEring */
+
+            DPRINTK("resp->fe_ring\n");
+            write_resp_to_fe_ring(&fe_ring.ring->ring[MASK_BLKIF_IDX(i)].resp);
+        }
+    
+        fe_ring.rsp_cons = fe_rp;
+
+        /* notify the fe if necessary */
+        if ( notify ) {
+            DPRINTK("kick_fe_domain()\n");
+            kick_fe_domain();
+        }
+    }
+
+    return 0;
+}
+
+int blktap_read_be_ring(void)
+{
+    /* This is called to read responses from the UBE ring. */
+
+    BLKIF_RING_IDX be_rp;
+    unsigned long i;
+    int notify;
+
+    DPRINTK("blktap_read_be_ring()\n");
+
+    be_rp = be_ring.ring->req_prod;
+    rmb();
+    notify = (be_rp != be_ring.req_cons);
+
+    /* if we are forwarding from UFERring to FERing */
+    if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
+
+        /* for each outstanding message on the UFEring  */
+        for ( i = be_ring.req_cons; i != be_rp; i++ ) {
+
+            /* XXX: remap pages on that message as necessary */
+            /* copy the message to the UBEring */
+
+            DPRINTK("req->be_ring\n");
+            write_req_to_be_ring(&be_ring.ring->ring[MASK_BLKIF_IDX(i)].req);
+        }
+    
+        be_ring.req_cons = be_rp;
+
+        /* notify the fe if necessary */
+        if ( notify ) {
+            DPRINTK("kick_be_domain()\n");
+            kick_be_domain();
+        }
+    }
+
+    return 0;
+}
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c
new file mode 100644 (file)
index 0000000..c10e3f3
--- /dev/null
@@ -0,0 +1,243 @@
+/******************************************************************************
+ * blktap_userdev.c
+ * 
+ * XenLinux virtual block-device tap.
+ * Control interface between the driver and a character device.
+ * 
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/gfp.h>
+#include <linux/poll.h>
+#include <asm/pgalloc.h>
+
+#include "blktap.h"
+
+
+unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH;
+
+/* Only one process may open /dev/xen/blktap at any time. */
+static unsigned long blktap_dev_inuse;
+unsigned long blktap_ring_ok; /* make this ring->state */
+
+/* for poll: */
+static wait_queue_head_t blktap_wait;
+
+/* Where things are inside the device mapping. */
+struct vm_area_struct *blktap_vma;
+unsigned long mmap_vstart;
+unsigned long rings_vstart;
+
+/* -------[ blktap vm ops ]------------------------------------------- */
+
+static struct page *blktap_nopage(struct vm_area_struct *vma,
+                                             unsigned long address,
+                                             int *type)
+{
+    /*
+     * if the page has not been mapped in by the driver then generate
+     * a SIGBUS to the domain.
+     */
+
+    force_sig(SIGBUS, current);
+
+    return 0;
+}
+
+struct vm_operations_struct blktap_vm_ops = {
+    nopage:   blktap_nopage,
+};
+
+/* -------[ blktap file ops ]----------------------------------------- */
+
+static int blktap_open(struct inode *inode, struct file *filp)
+{
+    if ( test_and_set_bit(0, &blktap_dev_inuse) )
+        return -EBUSY;
+
+    printk(KERN_ALERT "blktap open.\n");
+
+    /* Allocate the fe ring. */
+    fe_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL);
+    if (fe_ring.ring == NULL)
+        goto fail_nomem;
+
+    SetPageReserved(virt_to_page(fe_ring.ring));
+    
+    fe_ring.ring->req_prod = fe_ring.ring->resp_prod
+                           = fe_ring.req_prod
+                           = fe_ring.rsp_cons
+                           = 0;
+
+    /* Allocate the be ring. */
+    be_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL);
+    if (be_ring.ring == NULL)
+        goto fail_free_fe;
+
+    SetPageReserved(virt_to_page(be_ring.ring));
+    
+    be_ring.ring->req_prod = be_ring.ring->resp_prod
+                           = be_ring.rsp_prod
+                           = be_ring.req_cons
+                           = 0;
+
+    DPRINTK(KERN_ALERT "blktap open.\n");
+
+    return 0;
+
+ fail_free_fe:
+    free_page( (unsigned long) fe_ring.ring);
+
+ fail_nomem:
+    return -ENOMEM;
+}
+
+static int blktap_release(struct inode *inode, struct file *filp)
+{
+    blktap_dev_inuse = 0;
+    blktap_ring_ok = 0;
+
+    printk(KERN_ALERT "blktap closed.\n");
+
+    /* Free the ring page. */
+    ClearPageReserved(virt_to_page(fe_ring.ring));
+    free_page((unsigned long) fe_ring.ring);
+
+    ClearPageReserved(virt_to_page(be_ring.ring));
+    free_page((unsigned long) be_ring.ring);
+    
+    return 0;
+}
+
+static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+    int size;
+
+    printk(KERN_ALERT "blktap mmap (%lx, %lx)\n",
+           vma->vm_start, vma->vm_end);
+
+    vma->vm_ops = &blktap_vm_ops;
+
+    size = vma->vm_end - vma->vm_start;
+    if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
+        printk(KERN_INFO 
+               "blktap: you _must_ map exactly %d pages!\n",
+               MMAP_PAGES + RING_PAGES);
+        return -EAGAIN;
+    }
+
+    size >>= PAGE_SHIFT;
+    printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
+    
+    rings_vstart = vma->vm_start;
+    mmap_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
+    
+    /* Map the ring pages to the start of the region and reserve it. */
+
+    /* not sure if I really need to do this... */
+    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+    DPRINTK("Mapping be_ring page %lx.\n", __pa(be_ring.ring));
+    if (remap_page_range(vma, vma->vm_start, __pa(be_ring.ring), PAGE_SIZE, 
+                         vma->vm_page_prot)) {
+        printk(KERN_ERR "be_ring: remap_page_range failure!\n");
+    }
+
+    DPRINTK("Mapping fe_ring page %lx.\n", __pa(fe_ring.ring));
+    if (remap_page_range(vma, vma->vm_start + PAGE_SIZE, __pa(fe_ring.ring), 
+                         PAGE_SIZE, vma->vm_page_prot)) {
+        printk(KERN_ERR "fe_ring: remap_page_range failure!\n");
+    }
+
+    blktap_vma = vma;
+    blktap_ring_ok = 1;
+
+    return 0;
+}
+
+static int blktap_ioctl(struct inode *inode, struct file *filp,
+                        unsigned int cmd, unsigned long arg)
+{
+    switch(cmd) {
+    case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
+        return blktap_read_fe_ring();
+
+    case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */
+        return blktap_read_be_ring();
+
+    case BLKTAP_IOCTL_SETMODE:
+        if (BLKTAP_MODE_VALID(arg)) {
+            blktap_mode = arg;
+            /* XXX: may need to flush rings here. */
+            printk(KERN_INFO "blktap: set mode to %lx\n", arg);
+            return 0;
+        }
+        /* XXX: return a more meaningful error case here. */
+    }
+    return -ENOIOCTLCMD;
+}
+
+static unsigned int blktap_poll(struct file *file, poll_table *wait)
+{
+        poll_wait(file, &blktap_wait, wait);
+
+        if ( (fe_ring.req_prod != fe_ring.ring->req_prod) ||
+             (be_ring.rsp_prod != be_ring.ring->resp_prod) ) {
+
+            fe_ring.ring->req_prod = fe_ring.req_prod;
+            be_ring.ring->resp_prod = be_ring.rsp_prod;
+            return POLLIN | POLLRDNORM;
+        }
+
+        return 0;
+}
+
+void blktap_kick_user(void)
+{
+    /* blktap_ring->req_prod = blktap_req_prod; */
+    wake_up_interruptible(&blktap_wait);
+}
+
+static struct file_operations blktap_fops = {
+    owner:    THIS_MODULE,
+    poll:     blktap_poll,
+    ioctl:    blktap_ioctl,
+    open:     blktap_open,
+    release:  blktap_release,
+    mmap:     blktap_mmap,
+};
+
+/* -------[ blktap module setup ]------------------------------------- */
+
+static struct miscdevice blktap_miscdev = {
+    .minor        = BLKTAP_MINOR,
+    .name         = "blktap",
+    .fops         = &blktap_fops,
+    .devfs_name   = "misc/blktap",
+};
+
+int blktap_init(void)
+{
+    int err;
+
+    err = misc_register(&blktap_miscdev);
+    if ( err != 0 )
+    {
+        printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
+        return err;
+    }
+
+    init_waitqueue_head(&blktap_wait);
+
+
+    return 0;
+}
diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h b/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
new file mode 100644 (file)
index 0000000..421a81f
--- /dev/null
@@ -0,0 +1,59 @@
+/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
+ * which needs to alter them. */
+
+static inline void smpboot_clear_io_apic_irqs(void)
+{
+#if 1
+       printk("smpboot_clear_io_apic_irqs\n");
+#else
+       io_apic_irqs = 0;
+#endif
+}
+
+static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
+{
+#if 1
+       printk("smpboot_setup_warm_reset_vector\n");
+#else
+       CMOS_WRITE(0xa, 0xf);
+       local_flush_tlb();
+       Dprintk("1.\n");
+       *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
+       Dprintk("2.\n");
+       *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
+       Dprintk("3.\n");
+#endif
+}
+
+static inline void smpboot_restore_warm_reset_vector(void)
+{
+       /*
+        * Install writable page 0 entry to set BIOS data area.
+        */
+       local_flush_tlb();
+
+       /*
+        * Paranoid:  Set warm reset code and vector here back
+        * to default values.
+        */
+       CMOS_WRITE(0, 0xf);
+
+       *((volatile long *) phys_to_virt(0x467)) = 0;
+}
+
+static inline void smpboot_setup_io_apic(void)
+{
+#if 1
+       printk("smpboot_setup_io_apic\n");
+#else
+       /*
+        * Here we can be sure that there is an IO-APIC in the system. Let's
+        * go and set it up:
+        */
+       if (!skip_ioapic_setup && nr_ioapics)
+               setup_IO_APIC();
+#endif
+}
+
+
+#define        smp_found_config        (HYPERVISOR_shared_info->n_vcpu > 1)
diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h b/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h
new file mode 100644 (file)
index 0000000..fb8bd00
--- /dev/null
@@ -0,0 +1,224 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+#include <asm/page.h>
+#include <linux/config.h>
+#include <linux/compiler.h>
+
+asmlinkage int printk(const char * fmt, ...)
+       __attribute__ ((format (printf, 1, 2)));
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+typedef struct {
+       volatile unsigned int lock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+       unsigned magic;
+#endif
+} spinlock_t;
+
+#define SPINLOCK_MAGIC 0xdead4ead
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define SPINLOCK_MAGIC_INIT    , SPINLOCK_MAGIC
+#else
+#define SPINLOCK_MAGIC_INIT    /* */
+#endif
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
+
+#define spin_lock_init(x)      do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ */
+
+#define spin_is_locked(x)      (*(volatile signed char *)(&(x)->lock) <= 0)
+#define spin_unlock_wait(x)    do { barrier(); } while(spin_is_locked(x))
+
+#define spin_lock_string \
+       "\n1:\t" \
+       "lock ; decb %0\n\t" \
+       "jns 3f\n" \
+       "2:\t" \
+       "rep;nop\n\t" \
+       "cmpb $0,%0\n\t" \
+       "jle 2b\n\t" \
+       "jmp 1b\n" \
+       "3:\n\t"
+
+#define spin_lock_string_flags \
+       "\n1:\t" \
+       "lock ; decb %0\n\t" \
+       "jns 4f\n\t" \
+       "2:\t" \
+       "testl $0x200, %1\n\t" \
+       "jz 3f\n\t" \
+       "#sti\n\t" \
+       "3:\t" \
+       "rep;nop\n\t" \
+       "cmpb $0, %0\n\t" \
+       "jle 3b\n\t" \
+       "#cli\n\t" \
+       "jmp 1b\n" \
+       "4:\n\t"
+
+/*
+ * This works. Despite all the confusion.
+ * (except on PPro SMP or if we are using OOSTORE)
+ * (PPro errata 66, 92)
+ */
+#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
+
+#define spin_unlock_string \
+       "movb $1,%0" \
+               :"=m" (lock->lock) : : "memory"
+
+
+static inline void _raw_spin_unlock(spinlock_t *lock)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       BUG_ON(lock->magic != SPINLOCK_MAGIC);
+       BUG_ON(!spin_is_locked(lock));
+#endif
+       __asm__ __volatile__(
+               spin_unlock_string
+       );
+}
+
+#else
+
+#define spin_unlock_string \
+       "xchgb %b0, %1" \
+               :"=q" (oldval), "=m" (lock->lock) \
+               :"0" (oldval) : "memory"
+
+static inline void _raw_spin_unlock(spinlock_t *lock)
+{
+       char oldval = 1;
+#ifdef CONFIG_DEBUG_SPINLOCK
+       BUG_ON(lock->magic != SPINLOCK_MAGIC);
+       BUG_ON(!spin_is_locked(lock));
+#endif
+       __asm__ __volatile__(
+               spin_unlock_string
+       );
+}
+
+#endif
+
+static inline int _raw_spin_trylock(spinlock_t *lock)
+{
+       char oldval;
+       __asm__ __volatile__(
+               "xchgb %b0,%1"
+               :"=q" (oldval), "=m" (lock->lock)
+               :"0" (0) : "memory");
+       return oldval > 0;
+}
+
+static inline void _raw_spin_lock(spinlock_t *lock)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
+               printk("eip: %p\n", __builtin_return_address(0));
+               BUG();
+       }
+#endif
+       __asm__ __volatile__(
+               spin_lock_string
+               :"=m" (lock->lock) : : "memory");
+}
+
+static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
+               printk("eip: %p\n", __builtin_return_address(0));
+               BUG();
+       }
+#endif
+       __asm__ __volatile__(
+               spin_lock_string_flags
+               :"=m" (lock->lock) : "r" (flags) : "memory");
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+       volatile unsigned int lock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+       unsigned magic;
+#endif
+} rwlock_t;
+
+#define RWLOCK_MAGIC   0xdeaf1eed
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define RWLOCK_MAGIC_INIT      , RWLOCK_MAGIC
+#else
+#define RWLOCK_MAGIC_INIT      /* */
+#endif
+
+#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
+
+#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
+
+#define rwlock_is_locked(x) ((x)->lock != RW_LOCK_BIAS)
+
+/*
+ * On x86, we implement read-write locks as a 32-bit counter
+ * with the high bit (sign) being the "contended" bit.
+ *
+ * The inline assembly is non-obvious. Think about it.
+ *
+ * Changed to use the same technique as rw semaphores.  See
+ * semaphore.h for details.  -ben
+ */
+/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
+
+static inline void _raw_read_lock(rwlock_t *rw)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       BUG_ON(rw->magic != RWLOCK_MAGIC);
+#endif
+       __build_read_lock(rw, "__read_lock_failed");
+}
+
+static inline void _raw_write_lock(rwlock_t *rw)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       BUG_ON(rw->magic != RWLOCK_MAGIC);
+#endif
+       __build_write_lock(rw, "__write_lock_failed");
+}
+
+#define _raw_read_unlock(rw)           asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
+#define _raw_write_unlock(rw)  asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+
+static inline int _raw_write_trylock(rwlock_t *lock)
+{
+       atomic_t *count = (atomic_t *)lock;
+       if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+               return 1;
+       atomic_add(RW_LOCK_BIAS, count);
+       return 0;
+}
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c
deleted file mode 100644 (file)
index 9fabbfe..0000000
+++ /dev/null
@@ -1,599 +0,0 @@
-/*
- *     Intel SMP support routines.
- *
- *     (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- *     (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
- *
- *     This code is released under the GNU General Public License version 2 or
- *     later.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/irq.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/smp_lock.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
-
-#include <asm/mtrr.h>
-#include <asm/tlbflush.h>
-#if 0
-#include <mach_apic.h>
-#endif
-#include <asm-xen/evtchn.h>
-
-#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
-
-/*
- *     Some notes on x86 processor bugs affecting SMP operation:
- *
- *     Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
- *     The Linux implications for SMP are handled as follows:
- *
- *     Pentium III / [Xeon]
- *             None of the E1AP-E3AP errata are visible to the user.
- *
- *     E1AP.   see PII A1AP
- *     E2AP.   see PII A2AP
- *     E3AP.   see PII A3AP
- *
- *     Pentium II / [Xeon]
- *             None of the A1AP-A3AP errata are visible to the user.
- *
- *     A1AP.   see PPro 1AP
- *     A2AP.   see PPro 2AP
- *     A3AP.   see PPro 7AP
- *
- *     Pentium Pro
- *             None of 1AP-9AP errata are visible to the normal user,
- *     except occasional delivery of 'spurious interrupt' as trap #15.
- *     This is very rare and a non-problem.
- *
- *     1AP.    Linux maps APIC as non-cacheable
- *     2AP.    worked around in hardware
- *     3AP.    fixed in C0 and above steppings microcode update.
- *             Linux does not use excessive STARTUP_IPIs.
- *     4AP.    worked around in hardware
- *     5AP.    symmetric IO mode (normal Linux operation) not affected.
- *             'noapic' mode has vector 0xf filled out properly.
- *     6AP.    'noapic' mode might be affected - fixed in later steppings
- *     7AP.    We do not assume writes to the LVT deassering IRQs
- *     8AP.    We do not enable low power mode (deep sleep) during MP bootup
- *     9AP.    We do not use mixed mode
- *
- *     Pentium
- *             There is a marginal case where REP MOVS on 100MHz SMP
- *     machines with B stepping processors can fail. XXX should provide
- *     an L1cache=Writethrough or L1cache=off option.
- *
- *             B stepping CPUs may hang. There are hardware work arounds
- *     for this. We warn about it in case your board doesn't have the work
- *     arounds. Basically thats so I can tell anyone with a B stepping
- *     CPU and SMP problems "tough".
- *
- *     Specific items [From Pentium Processor Specification Update]
- *
- *     1AP.    Linux doesn't use remote read
- *     2AP.    Linux doesn't trust APIC errors
- *     3AP.    We work around this
- *     4AP.    Linux never generated 3 interrupts of the same priority
- *             to cause a lost local interrupt.
- *     5AP.    Remote read is never used
- *     6AP.    not affected - worked around in hardware
- *     7AP.    not affected - worked around in hardware
- *     8AP.    worked around in hardware - we get explicit CS errors if not
- *     9AP.    only 'noapic' mode affected. Might generate spurious
- *             interrupts, we log only the first one and count the
- *             rest silently.
- *     10AP.   not affected - worked around in hardware
- *     11AP.   Linux reads the APIC between writes to avoid this, as per
- *             the documentation. Make sure you preserve this as it affects
- *             the C stepping chips too.
- *     12AP.   not affected - worked around in hardware
- *     13AP.   not affected - worked around in hardware
- *     14AP.   we always deassert INIT during bootup
- *     15AP.   not affected - worked around in hardware
- *     16AP.   not affected - worked around in hardware
- *     17AP.   not affected - worked around in hardware
- *     18AP.   not affected - worked around in hardware
- *     19AP.   not affected - worked around in BIOS
- *
- *     If this sounds worrying believe me these bugs are either ___RARE___,
- *     or are signal timing bugs worked around in hardware and there's
- *     about nothing of note with C stepping upwards.
- */
-
-DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
-
-/*
- * the following functions deal with sending IPIs between CPUs.
- *
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
- */
-
-static inline int __prepare_ICR (unsigned int shortcut, int vector)
-{
-       return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
-}
-
-static inline int __prepare_ICR2 (unsigned int mask)
-{
-       return SET_APIC_DEST_FIELD(mask);
-}
-
-DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
-
-static inline void __send_IPI_one(unsigned int cpu, int vector)
-{
-       unsigned int evtchn;
-
-       evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
-       // printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, vector, evtchn);
-       if (evtchn) {
-#if 0
-               shared_info_t *s = HYPERVISOR_shared_info;
-               while (synch_test_bit(evtchn, &s->evtchn_pending[0]) ||
-                      synch_test_bit(evtchn, &s->evtchn_mask[0]))
-                       ;
-#endif
-               notify_via_evtchn(evtchn);
-       } else
-               printk("send_IPI to unbound port %d/%d",
-                      cpu, vector);
-}
-
-void __send_IPI_shortcut(unsigned int shortcut, int vector)
-{
-       int cpu;
-
-       switch (shortcut) {
-       case APIC_DEST_SELF:
-               __send_IPI_one(smp_processor_id(), vector);
-               break;
-       case APIC_DEST_ALLBUT:
-               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
-                       if (cpu == smp_processor_id())
-                               continue;
-                       if (cpu_isset(cpu, cpu_online_map)) {
-                               __send_IPI_one(cpu, vector);
-                       }
-               }
-               break;
-       default:
-               printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
-                      vector);
-               break;
-       }
-}
-
-void fastcall send_IPI_self(int vector)
-{
-       __send_IPI_shortcut(APIC_DEST_SELF, vector);
-}
-
-/*
- * This is only used on smaller machines.
- */
-void send_IPI_mask_bitmask(cpumask_t mask, int vector)
-{
-       unsigned long flags;
-       unsigned int cpu;
-
-       local_irq_save(flags);
-
-       for (cpu = 0; cpu < NR_CPUS; ++cpu) {
-               if (cpu_isset(cpu, mask)) {
-                       __send_IPI_one(cpu, vector);
-               }
-       }
-
-       local_irq_restore(flags);
-}
-
-inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
-{
-
-       send_IPI_mask_bitmask(mask, vector);
-}
-
-#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
-
-/*
- *     Smarter SMP flushing macros. 
- *             c/o Linus Torvalds.
- *
- *     These mean you can really definitely utterly forget about
- *     writing to user space from interrupts. (Its not allowed anyway).
- *
- *     Optimizations Manfred Spraul <manfred@colorfullife.com>
- */
-
-static cpumask_t flush_cpumask;
-static struct mm_struct * flush_mm;
-static unsigned long flush_va;
-static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED;
-#define FLUSH_ALL      0xffffffff
-
-/*
- * We cannot call mmdrop() because we are in interrupt context, 
- * instead update mm->cpu_vm_mask.
- *
- * We need to reload %cr3 since the page tables may be going
- * away from under us..
- */
-static inline void leave_mm (unsigned long cpu)
-{
-       if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
-               BUG();
-       cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
-       load_cr3(swapper_pg_dir);
-}
-
-/*
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- *     Stop ipi delivery for the old mm. This is not synchronized with
- *     the other cpus, but smp_invalidate_interrupt ignore flush ipis
- *     for the wrong mm, and in the worst case we perform a superflous
- *     tlb flush.
- * 1a2) set cpu_tlbstate to TLBSTATE_OK
- *     Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- *     was in lazy tlb mode.
- * 1a3) update cpu_tlbstate[].active_mm
- *     Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- *     Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- *     cpu_tlbstate[].active_mm is correct, cpu0 already handles
- *     flush ipis.
- * 1b1) set cpu_tlbstate to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- *     Atomically set the bit [other cpus will start sending flush ipis],
- *     and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- *   runs in kernel space, the cpu could load tlb entries for user space
- *   pages.
- *
- * The good news is that cpu_tlbstate is local to each cpu, no
- * write/read ordering problems.
- */
-
-/*
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- */
-
-irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
-                                    struct pt_regs *regs)
-{
-       unsigned long cpu;
-
-       cpu = get_cpu();
-
-       if (!cpu_isset(cpu, flush_cpumask))
-               goto out;
-               /* 
-                * This was a BUG() but until someone can quote me the
-                * line from the intel manual that guarantees an IPI to
-                * multiple CPUs is retried _only_ on the erroring CPUs
-                * its staying as a return
-                *
-                * BUG();
-                */
-                
-       if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
-               if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
-                       if (flush_va == FLUSH_ALL)
-                               local_flush_tlb();
-                       else
-                               __flush_tlb_one(flush_va);
-               } else
-                       leave_mm(cpu);
-       }
-       smp_mb__before_clear_bit();
-       cpu_clear(cpu, flush_cpumask);
-       smp_mb__after_clear_bit();
-out:
-       put_cpu_no_resched();
-
-       return IRQ_HANDLED;
-}
-
-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
-                                               unsigned long va)
-{
-       cpumask_t tmp;
-       /*
-        * A couple of (to be removed) sanity checks:
-        *
-        * - we do not send IPIs to not-yet booted CPUs.
-        * - current CPU must not be in mask
-        * - mask must exist :)
-        */
-       BUG_ON(cpus_empty(cpumask));
-
-       cpus_and(tmp, cpumask, cpu_online_map);
-       BUG_ON(!cpus_equal(cpumask, tmp));
-       BUG_ON(cpu_isset(smp_processor_id(), cpumask));
-       BUG_ON(!mm);
-
-       /*
-        * i'm not happy about this global shared spinlock in the
-        * MM hot path, but we'll see how contended it is.
-        * Temporarily this turns IRQs off, so that lockups are
-        * detected by the NMI watchdog.
-        */
-       spin_lock(&tlbstate_lock);
-       
-       flush_mm = mm;
-       flush_va = va;
-#if NR_CPUS <= BITS_PER_LONG
-       atomic_set_mask(cpumask, &flush_cpumask);
-#else
-       {
-               int k;
-               unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
-               unsigned long *cpu_mask = (unsigned long *)&cpumask;
-               for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
-                       atomic_set_mask(cpu_mask[k], &flush_mask[k]);
-       }
-#endif
-       /*
-        * We have to send the IPI only to
-        * CPUs affected.
-        */
-       send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
-
-       while (!cpus_empty(flush_cpumask))
-               /* nothing. lockup detection does not belong here */
-               mb();
-
-       flush_mm = NULL;
-       flush_va = 0;
-       spin_unlock(&tlbstate_lock);
-}
-       
-void flush_tlb_current_task(void)
-{
-       struct mm_struct *mm = current->mm;
-       cpumask_t cpu_mask;
-
-       preempt_disable();
-       cpu_mask = mm->cpu_vm_mask;
-       cpu_clear(smp_processor_id(), cpu_mask);
-
-       local_flush_tlb();
-       if (!cpus_empty(cpu_mask))
-               flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-       preempt_enable();
-}
-
-void flush_tlb_mm (struct mm_struct * mm)
-{
-       cpumask_t cpu_mask;
-
-       preempt_disable();
-       cpu_mask = mm->cpu_vm_mask;
-       cpu_clear(smp_processor_id(), cpu_mask);
-
-       if (current->active_mm == mm) {
-               if (current->mm)
-                       local_flush_tlb();
-               else
-                       leave_mm(smp_processor_id());
-       }
-       if (!cpus_empty(cpu_mask))
-               flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-
-       preempt_enable();
-}
-
-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
-{
-       struct mm_struct *mm = vma->vm_mm;
-       cpumask_t cpu_mask;
-
-       preempt_disable();
-       cpu_mask = mm->cpu_vm_mask;
-       cpu_clear(smp_processor_id(), cpu_mask);
-
-       if (current->active_mm == mm) {
-               if(current->mm)
-                       __flush_tlb_one(va);
-               else
-                       leave_mm(smp_processor_id());
-       }
-
-       if (!cpus_empty(cpu_mask))
-               flush_tlb_others(cpu_mask, mm, va);
-
-       preempt_enable();
-}
-
-static void do_flush_tlb_all(void* info)
-{
-       unsigned long cpu = smp_processor_id();
-
-       __flush_tlb_all();
-       if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
-               leave_mm(cpu);
-}
-
-void flush_tlb_all(void)
-{
-       on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
-}
-
-/*
- * this function sends a 'reschedule' IPI to another CPU.
- * it goes straight through and wastes no time serializing
- * anything. Worst case is that we lose a reschedule ...
- */
-void smp_send_reschedule(int cpu)
-{
-       send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
-}
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
-
-struct call_data_struct {
-       void (*func) (void *info);
-       void *info;
-       atomic_t started;
-       atomic_t finished;
-       int wait;
-};
-
-static struct call_data_struct * call_data;
-
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-                       int wait)
-/*
- * [SUMMARY] Run a function on all other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-{
-       struct call_data_struct data;
-       int cpus = num_online_cpus()-1;
-
-       if (!cpus)
-               return 0;
-
-       /* Can deadlock when called with interrupts disabled */
-       WARN_ON(irqs_disabled());
-
-       data.func = func;
-       data.info = info;
-       atomic_set(&data.started, 0);
-       data.wait = wait;
-       if (wait)
-               atomic_set(&data.finished, 0);
-
-       spin_lock(&call_lock);
-       call_data = &data;
-       mb();
-       
-       /* Send a message to all other CPUs and wait for them to respond */
-       send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
-       /* Wait for response */
-       while (atomic_read(&data.started) != cpus)
-               barrier();
-
-       if (wait)
-               while (atomic_read(&data.finished) != cpus)
-                       barrier();
-       spin_unlock(&call_lock);
-
-       return 0;
-}
-
-static void stop_this_cpu (void * dummy)
-{
-       /*
-        * Remove this CPU:
-        */
-       cpu_clear(smp_processor_id(), cpu_online_map);
-       local_irq_disable();
-#if 1
-       xxprint("stop_this_cpu disable_local_APIC\n");
-#else
-       disable_local_APIC();
-#endif
-       if (cpu_data[smp_processor_id()].hlt_works_ok)
-               for(;;) __asm__("hlt");
-       for (;;);
-}
-
-/*
- * this function calls the 'stop' function on all other CPUs in the system.
- */
-
-void smp_send_stop(void)
-{
-       smp_call_function(stop_this_cpu, NULL, 1, 0);
-
-       local_irq_disable();
-#if 1
-       xxprint("smp_send_stop disable_local_APIC\n");
-#else
-       disable_local_APIC();
-#endif
-       local_irq_enable();
-}
-
-/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
- */
-irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
-                                    struct pt_regs *regs)
-{
-
-       return IRQ_HANDLED;
-}
-
-#include <linux/kallsyms.h>
-irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
-                                       struct pt_regs *regs)
-{
-       void (*func) (void *info) = call_data->func;
-       void *info = call_data->info;
-       int wait = call_data->wait;
-
-       /*
-        * Notify initiating CPU that I've grabbed the data and am
-        * about to execute the function
-        */
-       mb();
-       atomic_inc(&call_data->started);
-       /*
-        * At this point the info structure may be out of scope unless wait==1
-        */
-       irq_enter();
-       (*func)(info);
-       irq_exit();
-
-       if (wait) {
-               mb();
-               atomic_inc(&call_data->finished);
-       }
-
-       return IRQ_HANDLED;
-}
-
diff --git a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c
deleted file mode 100644 (file)
index a9bb0e2..0000000
+++ /dev/null
@@ -1,1364 +0,0 @@
-/*
- *     x86 SMP booting functions
- *
- *     (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- *     (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- *     Much of the core SMP work is based on previous work by Thomas Radke, to
- *     whom a great many thanks are extended.
- *
- *     Thanks to Intel for making available several different Pentium,
- *     Pentium Pro and Pentium-II/Xeon MP machines.
- *     Original development of Linux SMP code supported by Caldera.
- *
- *     This code is released under the GNU General Public License version 2 or
- *     later.
- *
- *     Fixes
- *             Felix Koop      :       NR_CPUS used properly
- *             Jose Renau      :       Handle single CPU case.
- *             Alan Cox        :       By repeated request 8) - Total BogoMIPS report.
- *             Greg Wright     :       Fix for kernel stacks panic.
- *             Erich Boleyn    :       MP v1.4 and additional changes.
- *     Matthias Sattler        :       Changes for 2.1 kernel map.
- *     Michel Lespinasse       :       Changes for 2.1 kernel map.
- *     Michael Chastain        :       Change trampoline.S to gnu as.
- *             Alan Cox        :       Dumb bug: 'B' step PPro's are fine
- *             Ingo Molnar     :       Added APIC timers, based on code
- *                                     from Jose Renau
- *             Ingo Molnar     :       various cleanups and rewrites
- *             Tigran Aivazian :       fixed "0.00 in /proc/uptime on SMP" bug.
- *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs
- *             Martin J. Bligh :       Added support for multi-quad systems
- *             Dave Jones      :       Report invalid combinations of Athlon CPUs.
-*              Rusty Russell   :       Hacked into shape for new "hotplug" boot process. */
-
-#include <linux/module.h>
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/kernel_stat.h>
-#include <linux/smp_lock.h>
-#include <linux/irq.h>
-#include <linux/bootmem.h>
-
-#include <linux/delay.h>
-#include <linux/mc146818rtc.h>
-#include <asm/tlbflush.h>
-#include <asm/desc.h>
-#include <asm/arch_hooks.h>
-
-#if 1
-#define Dprintk(args...)
-#else
-#include <mach_apic.h>
-#endif
-#include <mach_wakecpu.h>
-#include <smpboot_hooks.h>
-
-/* Set if we find a B stepping CPU */
-static int __initdata smp_b_stepping;
-
-/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
-
-/* bitmap of online cpus */
-cpumask_t cpu_online_map;
-
-static cpumask_t cpu_callin_map;
-cpumask_t cpu_callout_map;
-static cpumask_t smp_commenced_mask;
-
-/* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-
-u8 x86_cpu_to_apicid[NR_CPUS] =
-                       { [0 ... NR_CPUS-1] = 0xff };
-EXPORT_SYMBOL(x86_cpu_to_apicid);
-
-/* Set when the idlers are all forked */
-int smp_threads_ready;
-
-#if 0
-/*
- * Trampoline 80x86 program as an array.
- */
-
-extern unsigned char trampoline_data [];
-extern unsigned char trampoline_end  [];
-static unsigned char *trampoline_base;
-static int trampoline_exec;
-
-/*
- * Currently trivial. Write the real->protected mode
- * bootstrap into the page concerned. The caller
- * has made sure it's suitably aligned.
- */
-
-static unsigned long __init setup_trampoline(void)
-{
-       memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
-       return virt_to_phys(trampoline_base);
-}
-#endif
-
-/*
- * We are called very early to get the low memory for the
- * SMP bootup trampoline page.
- */
-void __init smp_alloc_memory(void)
-{
-#if 1
-       int cpu;
-
-       for (cpu = 1; cpu < NR_CPUS; cpu++) {
-               cpu_gdt_descr[cpu].address = (unsigned long)
-                       alloc_bootmem_low_pages(PAGE_SIZE);
-               /* XXX free unused pages later */
-       }
-#else
-       trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
-       /*
-        * Has to be in very low memory so we can execute
-        * real-mode AP code.
-        */
-       if (__pa(trampoline_base) >= 0x9F000)
-               BUG();
-       /*
-        * Make the SMP trampoline executable:
-        */
-       trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
-#endif
-}
-
-/*
- * The bootstrap kernel entry code has set these up. Save them for
- * a given CPU
- */
-
-static void __init smp_store_cpu_info(int id)
-{
-       struct cpuinfo_x86 *c = cpu_data + id;
-
-       *c = boot_cpu_data;
-       if (id!=0)
-               identify_cpu(c);
-       /*
-        * Mask B, Pentium, but not Pentium MMX
-        */
-       if (c->x86_vendor == X86_VENDOR_INTEL &&
-           c->x86 == 5 &&
-           c->x86_mask >= 1 && c->x86_mask <= 4 &&
-           c->x86_model <= 3)
-               /*
-                * Remember we have B step Pentia with bugs
-                */
-               smp_b_stepping = 1;
-
-       /*
-        * Certain Athlons might work (for various values of 'work') in SMP
-        * but they are not certified as MP capable.
-        */
-       if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
-
-               /* Athlon 660/661 is valid. */  
-               if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
-                       goto valid_k7;
-
-               /* Duron 670 is valid */
-               if ((c->x86_model==7) && (c->x86_mask==0))
-                       goto valid_k7;
-
-               /*
-                * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
-                * It's worth noting that the A5 stepping (662) of some Athlon XP's
-                * have the MP bit set.
-                * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
-                */
-               if (((c->x86_model==6) && (c->x86_mask>=2)) ||
-                   ((c->x86_model==7) && (c->x86_mask>=1)) ||
-                    (c->x86_model> 7))
-                       if (cpu_has_mp)
-                               goto valid_k7;
-
-               /* If we get here, it's not a certified SMP capable AMD system. */
-               tainted |= TAINT_UNSAFE_SMP;
-       }
-
-valid_k7:
-       ;
-}
-
-#if 0
-/*
- * TSC synchronization.
- *
- * We first check whether all CPUs have their TSC's synchronized,
- * then we print a warning if not, and always resync.
- */
-
-static atomic_t tsc_start_flag = ATOMIC_INIT(0);
-static atomic_t tsc_count_start = ATOMIC_INIT(0);
-static atomic_t tsc_count_stop = ATOMIC_INIT(0);
-static unsigned long long tsc_values[NR_CPUS];
-
-#define NR_LOOPS 5
-
-static void __init synchronize_tsc_bp (void)
-{
-       int i;
-       unsigned long long t0;
-       unsigned long long sum, avg;
-       long long delta;
-       unsigned long one_usec;
-       int buggy = 0;
-
-       printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
-
-       /* convert from kcyc/sec to cyc/usec */
-       one_usec = cpu_khz / 1000;
-
-       atomic_set(&tsc_start_flag, 1);
-       wmb();
-
-       /*
-        * We loop a few times to get a primed instruction cache,
-        * then the last pass is more or less synchronized and
-        * the BP and APs set their cycle counters to zero all at
-        * once. This reduces the chance of having random offsets
-        * between the processors, and guarantees that the maximum
-        * delay between the cycle counters is never bigger than
-        * the latency of information-passing (cachelines) between
-        * two CPUs.
-        */
-       for (i = 0; i < NR_LOOPS; i++) {
-               /*
-                * all APs synchronize but they loop on '== num_cpus'
-                */
-               while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
-                       mb();
-               atomic_set(&tsc_count_stop, 0);
-               wmb();
-               /*
-                * this lets the APs save their current TSC:
-                */
-               atomic_inc(&tsc_count_start);
-
-               rdtscll(tsc_values[smp_processor_id()]);
-               /*
-                * We clear the TSC in the last loop:
-                */
-               if (i == NR_LOOPS-1)
-                       write_tsc(0, 0);
-
-               /*
-                * Wait for all APs to leave the synchronization point:
-                */
-               while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
-                       mb();
-               atomic_set(&tsc_count_start, 0);
-               wmb();
-               atomic_inc(&tsc_count_stop);
-       }
-
-       sum = 0;
-       for (i = 0; i < NR_CPUS; i++) {
-               if (cpu_isset(i, cpu_callout_map)) {
-                       t0 = tsc_values[i];
-                       sum += t0;
-               }
-       }
-       avg = sum;
-       do_div(avg, num_booting_cpus());
-
-       sum = 0;
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_isset(i, cpu_callout_map))
-                       continue;
-               delta = tsc_values[i] - avg;
-               if (delta < 0)
-                       delta = -delta;
-               /*
-                * We report bigger than 2 microseconds clock differences.
-                */
-               if (delta > 2*one_usec) {
-                       long realdelta;
-                       if (!buggy) {
-                               buggy = 1;
-                               printk("\n");
-                       }
-                       realdelta = delta;
-                       do_div(realdelta, one_usec);
-                       if (tsc_values[i] < avg)
-                               realdelta = -realdelta;
-
-                       printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
-               }
-
-               sum += delta;
-       }
-       if (!buggy)
-               printk("passed.\n");
-}
-
-static void __init synchronize_tsc_ap (void)
-{
-       int i;
-
-       /*
-        * Not every cpu is online at the time
-        * this gets called, so we first wait for the BP to
-        * finish SMP initialization:
-        */
-       while (!atomic_read(&tsc_start_flag)) mb();
-
-       for (i = 0; i < NR_LOOPS; i++) {
-               atomic_inc(&tsc_count_start);
-               while (atomic_read(&tsc_count_start) != num_booting_cpus())
-                       mb();
-
-               rdtscll(tsc_values[smp_processor_id()]);
-               if (i == NR_LOOPS-1)
-                       write_tsc(0, 0);
-
-               atomic_inc(&tsc_count_stop);
-               while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
-       }
-}
-#undef NR_LOOPS
-#endif
-
-extern void calibrate_delay(void);
-
-static atomic_t init_deasserted;
-
-void __init smp_callin(void)
-{
-       int cpuid, phys_id;
-       unsigned long timeout;
-
-#if 0
-       /*
-        * If waken up by an INIT in an 82489DX configuration
-        * we may get here before an INIT-deassert IPI reaches
-        * our local APIC.  We have to wait for the IPI or we'll
-        * lock up on an APIC access.
-        */
-       wait_for_init_deassert(&init_deasserted);
-#endif
-
-       /*
-        * (This works even if the APIC is not enabled.)
-        */
-       phys_id = smp_processor_id();
-       cpuid = smp_processor_id();
-       if (cpu_isset(cpuid, cpu_callin_map)) {
-               printk("huh, phys CPU#%d, CPU#%d already present??\n",
-                                       phys_id, cpuid);
-               BUG();
-       }
-       Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
-
-       /*
-        * STARTUP IPIs are fragile beasts as they might sometimes
-        * trigger some glue motherboard logic. Complete APIC bus
-        * silence for 1 second, this overestimates the time the
-        * boot CPU is spending to send the up to 2 STARTUP IPIs
-        * by a factor of two. This should be enough.
-        */
-
-       /*
-        * Waiting 2s total for startup (udelay is not yet working)
-        */
-       timeout = jiffies + 2*HZ;
-       while (time_before(jiffies, timeout)) {
-               /*
-                * Has the boot CPU finished it's STARTUP sequence?
-                */
-               if (cpu_isset(cpuid, cpu_callout_map))
-                       break;
-               rep_nop();
-       }
-
-       if (!time_before(jiffies, timeout)) {
-               printk("BUG: CPU%d started up but did not get a callout!\n",
-                       cpuid);
-               BUG();
-       }
-
-#if 0
-       /*
-        * the boot CPU has finished the init stage and is spinning
-        * on callin_map until we finish. We are free to set up this
-        * CPU, first the APIC. (this is probably redundant on most
-        * boards)
-        */
-
-       Dprintk("CALLIN, before setup_local_APIC().\n");
-       smp_callin_clear_local_apic();
-       setup_local_APIC();
-#endif
-       map_cpu_to_logical_apicid();
-
-       local_irq_enable();
-
-       /*
-        * Get our bogomips.
-        */
-       calibrate_delay();
-       Dprintk("Stack at about %p\n",&cpuid);
-
-       /*
-        * Save our processor parameters
-        */
-       smp_store_cpu_info(cpuid);
-
-#if 0
-       disable_APIC_timer();
-#endif
-       local_irq_disable();
-       /*
-        * Allow the master to continue.
-        */
-       cpu_set(cpuid, cpu_callin_map);
-
-#if 0
-       /*
-        *      Synchronize the TSC with the BP
-        */
-       if (cpu_has_tsc && cpu_khz)
-               synchronize_tsc_ap();
-#endif
-}
-
-int cpucount;
-
-extern int cpu_idle(void);
-
-
-static irqreturn_t local_debug_interrupt(int irq, void *dev_id,
-                                        struct pt_regs *regs)
-{
-
-       return IRQ_HANDLED;
-}
-
-static struct irqaction local_irq_debug = {
-       local_debug_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ldebug",
-       NULL, NULL
-};
-
-void local_setup_debug(void)
-{
-       (void)setup_irq(bind_virq_to_irq(VIRQ_DEBUG), &local_irq_debug);
-}
-
-
-extern void local_setup_timer(void);
-
-/*
- * Activate a secondary processor.
- */
-int __init start_secondary(void *unused)
-{
-       /*
-        * Dont put anything before smp_callin(), SMP
-        * booting is too fragile that we want to limit the
-        * things done here to the most necessary things.
-        */
-       cpu_init();
-       smp_callin();
-       while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
-               rep_nop();
-       local_setup_timer();
-       local_setup_debug();    /* XXX */
-       smp_intr_init();
-       local_irq_enable();
-       /*
-        * low-memory mappings have been cleared, flush them from
-        * the local TLBs too.
-        */
-       local_flush_tlb();
-       cpu_set(smp_processor_id(), cpu_online_map);
-       wmb();
-       if (0) {
-               char *msg2 = "delay2\n";
-               int timeout;
-               for (timeout = 0; timeout < 50000; timeout++) {
-                       udelay(1000);
-                       if (timeout == 2000) {
-                               (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2);
-                               timeout = 0;
-                       }
-               }
-       }
-       return cpu_idle();
-}
-
-/*
- * Everything has been set up for the secondary
- * CPUs - they just need to reload everything
- * from the task structure
- * This function must not return.
- */
-void __init initialize_secondary(void)
-{
-       /*
-        * We don't actually need to load the full TSS,
-        * basically just the stack pointer and the eip.
-        */
-
-       asm volatile(
-               "movl %0,%%esp\n\t"
-               "jmp *%1"
-               :
-               :"r" (current->thread.esp),"r" (current->thread.eip));
-}
-
-extern struct {
-       void * esp;
-       unsigned short ss;
-} stack_start;
-
-#ifdef CONFIG_NUMA
-
-/* which logical CPUs are on which nodes */
-cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
-                               { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
-/* which node each logical CPU is on */
-int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_2_node);
-
-/* set up a mapping between cpu and node. */
-static inline void map_cpu_to_node(int cpu, int node)
-{
-       printk("Mapping cpu %d to node %d\n", cpu, node);
-       cpu_set(cpu, node_2_cpu_mask[node]);
-       cpu_2_node[cpu] = node;
-}
-
-/* undo a mapping between cpu and node. */
-static inline void unmap_cpu_to_node(int cpu)
-{
-       int node;
-
-       printk("Unmapping cpu %d from all nodes\n", cpu);
-       for (node = 0; node < MAX_NUMNODES; node ++)
-               cpu_clear(cpu, node_2_cpu_mask[node]);
-       cpu_2_node[cpu] = 0;
-}
-#else /* !CONFIG_NUMA */
-
-#define map_cpu_to_node(cpu, node)     ({})
-#define unmap_cpu_to_node(cpu) ({})
-
-#endif /* CONFIG_NUMA */
-
-u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-
-void map_cpu_to_logical_apicid(void)
-{
-       int cpu = smp_processor_id();
-       int apicid = smp_processor_id();
-
-       cpu_2_logical_apicid[cpu] = apicid;
-       map_cpu_to_node(cpu, apicid_to_node(apicid));
-}
-
-void unmap_cpu_to_logical_apicid(int cpu)
-{
-       cpu_2_logical_apicid[cpu] = BAD_APICID;
-       unmap_cpu_to_node(cpu);
-}
-
-#if APIC_DEBUG
-static inline void __inquire_remote_apic(int apicid)
-{
-       int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
-       char *names[] = { "ID", "VERSION", "SPIV" };
-       int timeout, status;
-
-       printk("Inquiring remote APIC #%d...\n", apicid);
-
-       for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
-               printk("... APIC #%d %s: ", apicid, names[i]);
-
-               /*
-                * Wait for idle.
-                */
-               apic_wait_icr_idle();
-
-               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
-               apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
-
-               timeout = 0;
-               do {
-                       udelay(100);
-                       status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
-               } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
-
-               switch (status) {
-               case APIC_ICR_RR_VALID:
-                       status = apic_read(APIC_RRR);
-                       printk("%08x\n", status);
-                       break;
-               default:
-                       printk("failed\n");
-               }
-       }
-}
-#endif
-
-#if 0
-#ifdef WAKE_SECONDARY_VIA_NMI
-/* 
- * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
- * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
- * won't ... remember to clear down the APIC, etc later.
- */
-static int __init
-wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
-{
-       unsigned long send_status = 0, accept_status = 0;
-       int timeout, maxlvt;
-
-       /* Target chip */
-       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
-
-       /* Boot on the stack */
-       /* Kick the second */
-       apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
-
-       Dprintk("Waiting for send to finish...\n");
-       timeout = 0;
-       do {
-               Dprintk("+");
-               udelay(100);
-               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-       } while (send_status && (timeout++ < 1000));
-
-       /*
-        * Give the other CPU some time to accept the IPI.
-        */
-       udelay(200);
-       /*
-        * Due to the Pentium erratum 3AP.
-        */
-       maxlvt = get_maxlvt();
-       if (maxlvt > 3) {
-               apic_read_around(APIC_SPIV);
-               apic_write(APIC_ESR, 0);
-       }
-       accept_status = (apic_read(APIC_ESR) & 0xEF);
-       Dprintk("NMI sent.\n");
-
-       if (send_status)
-               printk("APIC never delivered???\n");
-       if (accept_status)
-               printk("APIC delivery error (%lx).\n", accept_status);
-
-       return (send_status | accept_status);
-}
-#endif /* WAKE_SECONDARY_VIA_NMI */
-
-#ifdef WAKE_SECONDARY_VIA_INIT
-static int __init
-wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
-{
-       unsigned long send_status = 0, accept_status = 0;
-       int maxlvt, timeout, num_starts, j;
-
-       /*
-        * Be paranoid about clearing APIC errors.
-        */
-       if (APIC_INTEGRATED(apic_version[phys_apicid])) {
-               apic_read_around(APIC_SPIV);
-               apic_write(APIC_ESR, 0);
-               apic_read(APIC_ESR);
-       }
-
-       Dprintk("Asserting INIT.\n");
-
-       /*
-        * Turn INIT on target chip
-        */
-       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
-       /*
-        * Send IPI
-        */
-       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
-                               | APIC_DM_INIT);
-
-       Dprintk("Waiting for send to finish...\n");
-       timeout = 0;
-       do {
-               Dprintk("+");
-               udelay(100);
-               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-       } while (send_status && (timeout++ < 1000));
-
-       mdelay(10);
-
-       Dprintk("Deasserting INIT.\n");
-
-       /* Target chip */
-       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
-       /* Send IPI */
-       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
-
-       Dprintk("Waiting for send to finish...\n");
-       timeout = 0;
-       do {
-               Dprintk("+");
-               udelay(100);
-               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-       } while (send_status && (timeout++ < 1000));
-
-       atomic_set(&init_deasserted, 1);
-
-       /*
-        * Should we send STARTUP IPIs ?
-        *
-        * Determine this based on the APIC version.
-        * If we don't have an integrated APIC, don't send the STARTUP IPIs.
-        */
-       if (APIC_INTEGRATED(apic_version[phys_apicid]))
-               num_starts = 2;
-       else
-               num_starts = 0;
-
-       /*
-        * Run STARTUP IPI loop.
-        */
-       Dprintk("#startup loops: %d.\n", num_starts);
-
-       maxlvt = get_maxlvt();
-
-       for (j = 1; j <= num_starts; j++) {
-               Dprintk("Sending STARTUP #%d.\n",j);
-               apic_read_around(APIC_SPIV);
-               apic_write(APIC_ESR, 0);
-               apic_read(APIC_ESR);
-               Dprintk("After apic_write.\n");
-
-               /*
-                * STARTUP IPI
-                */
-
-               /* Target chip */
-               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
-               /* Boot on the stack */
-               /* Kick the second */
-               apic_write_around(APIC_ICR, APIC_DM_STARTUP
-                                       | (start_eip >> 12));
-
-               /*
-                * Give the other CPU some time to accept the IPI.
-                */
-               udelay(300);
-
-               Dprintk("Startup point 1.\n");
-
-               Dprintk("Waiting for send to finish...\n");
-               timeout = 0;
-               do {
-                       Dprintk("+");
-                       udelay(100);
-                       send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-               } while (send_status && (timeout++ < 1000));
-
-               /*
-                * Give the other CPU some time to accept the IPI.
-                */
-               udelay(200);
-               /*
-                * Due to the Pentium erratum 3AP.
-                */
-               if (maxlvt > 3) {
-                       apic_read_around(APIC_SPIV);
-                       apic_write(APIC_ESR, 0);
-               }
-               accept_status = (apic_read(APIC_ESR) & 0xEF);
-               if (send_status || accept_status)
-                       break;
-       }
-       Dprintk("After Startup.\n");
-
-       if (send_status)
-               printk("APIC never delivered???\n");
-       if (accept_status)
-               printk("APIC delivery error (%lx).\n", accept_status);
-
-       return (send_status | accept_status);
-}
-#endif /* WAKE_SECONDARY_VIA_INIT */
-#endif
-
-extern cpumask_t cpu_initialized;
-
-static int __init do_boot_cpu(int apicid)
-/*
- * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
- * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
- */
-{
-       struct task_struct *idle;
-       unsigned long boot_error;
-       int timeout, cpu;
-       unsigned long start_eip;
-#if 0
-       unsigned short nmi_high = 0, nmi_low = 0;
-#endif
-       full_execution_context_t ctxt;
-       extern void startup_32_smp(void);
-       extern void hypervisor_callback(void);
-       extern void failsafe_callback(void);
-       extern int smp_trap_init(trap_info_t *);
-       int i;
-
-       cpu = ++cpucount;
-       /*
-        * We can't use kernel_thread since we must avoid to
-        * reschedule the child.
-        */
-       idle = fork_idle(cpu);
-       if (IS_ERR(idle))
-               panic("failed fork for CPU %d", cpu);
-       idle->thread.eip = (unsigned long) start_secondary;
-       /* start_eip had better be page-aligned! */
-       start_eip = (unsigned long)startup_32_smp;
-
-       /* So we see what's up   */
-       printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
-       /* Stack for startup_32 can be just as for start_secondary onwards */
-       stack_start.esp = (void *) idle->thread.esp;
-
-       irq_ctx_init(cpu);
-
-       /*
-        * This grunge runs the startup process for
-        * the targeted processor.
-        */
-
-       atomic_set(&init_deasserted, 0);
-
-#if 1
-       if (cpu_gdt_descr[0].size > PAGE_SIZE)
-               BUG();
-       cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
-       memcpy((void *)cpu_gdt_descr[cpu].address,
-              (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
-               memset((char *)cpu_gdt_descr[cpu].address +
-                      FIRST_RESERVED_GDT_ENTRY * 8, 0,
-                      NR_RESERVED_GDT_ENTRIES * 8);
-
-       memset(&ctxt, 0, sizeof(ctxt));
-
-       ctxt.cpu_ctxt.ds = __USER_DS;
-       ctxt.cpu_ctxt.es = __USER_DS;
-       ctxt.cpu_ctxt.fs = 0;
-       ctxt.cpu_ctxt.gs = 0;
-       ctxt.cpu_ctxt.ss = __KERNEL_DS;
-       ctxt.cpu_ctxt.cs = __KERNEL_CS;
-       ctxt.cpu_ctxt.eip = start_eip;
-       ctxt.cpu_ctxt.esp = idle->thread.esp;
-       ctxt.cpu_ctxt.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
-
-       /* FPU is set up to default initial state. */
-       memset(ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
-
-       /* Virtual IDT is empty at start-of-day. */
-       for ( i = 0; i < 256; i++ )
-       {
-               ctxt.trap_ctxt[i].vector = i;
-               ctxt.trap_ctxt[i].cs     = FLAT_GUESTOS_CS;
-       }
-       ctxt.fast_trap_idx = smp_trap_init(ctxt.trap_ctxt);
-
-       /* No LDT. */
-       ctxt.ldt_ents = 0;
-
-       {
-               unsigned long va;
-               int f;
-
-               for (va = cpu_gdt_descr[cpu].address, f = 0;
-                    va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
-                    va += PAGE_SIZE, f++) {
-                       ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
-                       make_page_readonly((void *)va);
-               }
-               ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
-               flush_page_update_queue();
-       }
-
-       /* Ring 1 stack is the initial stack. */
-       ctxt.guestos_ss  = __KERNEL_DS;
-       ctxt.guestos_esp = idle->thread.esp;
-
-       /* Callback handlers. */
-       ctxt.event_callback_cs     = __KERNEL_CS;
-       ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
-       ctxt.failsafe_callback_cs  = __KERNEL_CS;
-       ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
-
-       ctxt.pt_base = (unsigned long)virt_to_machine(swapper_pg_dir);
-
-       boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
-
-       if (!boot_error) {
-               /*
-                * allow APs to start initializing.
-                */
-               Dprintk("Before Callout %d.\n", cpu);
-               cpu_set(cpu, cpu_callout_map);
-               Dprintk("After Callout %d.\n", cpu);
-
-               /*
-                * Wait 5s total for a response
-                */
-               for (timeout = 0; timeout < 50000; timeout++) {
-                       if (cpu_isset(cpu, cpu_callin_map))
-                               break;  /* It has booted */
-                       udelay(100);
-               }
-
-               if (cpu_isset(cpu, cpu_callin_map)) {
-                       /* number CPUs logically, starting from 1 (BSP is 0) */
-                       Dprintk("OK.\n");
-                       printk("CPU%d: ", cpu);
-                       print_cpu_info(&cpu_data[cpu]);
-                       Dprintk("CPU has booted.\n");
-               } else {
-                       boot_error= 1;
-               }
-       }
-       x86_cpu_to_apicid[cpu] = apicid;
-       if (boot_error) {
-               /* Try to put things back the way they were before ... */
-               unmap_cpu_to_logical_apicid(cpu);
-               cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
-               cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
-               cpucount--;
-       }
-
-#else
-       Dprintk("Setting warm reset code and vector.\n");
-
-       store_NMI_vector(&nmi_high, &nmi_low);
-
-       smpboot_setup_warm_reset_vector(start_eip);
-
-       /*
-        * Starting actual IPI sequence...
-        */
-       boot_error = wakeup_secondary_cpu(apicid, start_eip);
-
-       if (!boot_error) {
-               /*
-                * allow APs to start initializing.
-                */
-               Dprintk("Before Callout %d.\n", cpu);
-               cpu_set(cpu, cpu_callout_map);
-               Dprintk("After Callout %d.\n", cpu);
-
-               /*
-                * Wait 5s total for a response
-                */
-               for (timeout = 0; timeout < 50000; timeout++) {
-                       if (cpu_isset(cpu, cpu_callin_map))
-                               break;  /* It has booted */
-                       udelay(100);
-               }
-
-               if (cpu_isset(cpu, cpu_callin_map)) {
-                       /* number CPUs logically, starting from 1 (BSP is 0) */
-                       Dprintk("OK.\n");
-                       printk("CPU%d: ", cpu);
-                       print_cpu_info(&cpu_data[cpu]);
-                       Dprintk("CPU has booted.\n");
-               } else {
-                       boot_error= 1;
-                       if (*((volatile unsigned char *)trampoline_base)
-                                       == 0xA5)
-                               /* trampoline started but...? */
-                               printk("Stuck ??\n");
-                       else
-                               /* trampoline code not run */
-                               printk("Not responding.\n");
-                       inquire_remote_apic(apicid);
-               }
-       }
-       x86_cpu_to_apicid[cpu] = apicid;
-       if (boot_error) {
-               /* Try to put things back the way they were before ... */
-               unmap_cpu_to_logical_apicid(cpu);
-               cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
-               cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
-               cpucount--;
-       }
-
-       /* mark "stuck" area as not stuck */
-       *((volatile unsigned long *)trampoline_base) = 0;
-#endif
-
-       return boot_error;
-}
-
-cycles_t cacheflush_time;
-unsigned long cache_decay_ticks;
-
-static void smp_tune_scheduling (void)
-{
-       unsigned long cachesize;       /* kB   */
-       unsigned long bandwidth = 350; /* MB/s */
-       /*
-        * Rough estimation for SMP scheduling, this is the number of
-        * cycles it takes for a fully memory-limited process to flush
-        * the SMP-local cache.
-        *
-        * (For a P5 this pretty much means we will choose another idle
-        *  CPU almost always at wakeup time (this is due to the small
-        *  L1 cache), on PIIs it's around 50-100 usecs, depending on
-        *  the cache size)
-        */
-
-       if (!cpu_khz) {
-               /*
-                * this basically disables processor-affinity
-                * scheduling on SMP without a TSC.
-                */
-               cacheflush_time = 0;
-               return;
-       } else {
-               cachesize = boot_cpu_data.x86_cache_size;
-               if (cachesize == -1) {
-                       cachesize = 16; /* Pentiums, 2x8kB cache */
-                       bandwidth = 100;
-               }
-
-               cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
-       }
-
-       cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
-
-       printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
-               (long)cacheflush_time/(cpu_khz/1000),
-               ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
-       printk("task migration cache decay timeout: %ld msecs.\n",
-               cache_decay_ticks);
-}
-
-/*
- * Cycle through the processors sending APIC IPIs to boot each.
- */
-
-#if 0
-static int boot_cpu_logical_apicid;
-#endif
-/* Where the IO area was mapped on multiquad, always 0 otherwise */
-void *xquad_portio;
-
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
-
-static void __init smp_boot_cpus(unsigned int max_cpus)
-{
-       int cpu, kicked;
-       unsigned long bogosum = 0;
-#if 0
-       int apicid, bit;
-#endif
-
-       /*
-        * Setup boot CPU information
-        */
-       smp_store_cpu_info(0); /* Final full version of the data */
-       printk("CPU%d: ", 0);
-       print_cpu_info(&cpu_data[0]);
-
-#if 0
-       boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
-       boot_cpu_logical_apicid = logical_smp_processor_id();
-       x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
-#else
-       // boot_cpu_physical_apicid = 0;
-       // boot_cpu_logical_apicid = 0;
-       x86_cpu_to_apicid[0] = 0;
-#endif
-
-       current_thread_info()->cpu = 0;
-       smp_tune_scheduling();
-       cpus_clear(cpu_sibling_map[0]);
-       cpu_set(0, cpu_sibling_map[0]);
-
-       /*
-        * If we couldn't find an SMP configuration at boot time,
-        * get out of here now!
-        */
-       if (!smp_found_config /* && !acpi_lapic) */) {
-               printk(KERN_NOTICE "SMP motherboard not detected.\n");
-               smpboot_clear_io_apic_irqs();
-#if 0
-               phys_cpu_present_map = physid_mask_of_physid(0);
-               if (APIC_init_uniprocessor())
-                       printk(KERN_NOTICE "Local APIC not detected."
-                                          " Using dummy APIC emulation.\n");
-#endif
-               map_cpu_to_logical_apicid();
-               return;
-       }
-
-#if 0
-       /*
-        * Should not be necessary because the MP table should list the boot
-        * CPU too, but we do it for the sake of robustness anyway.
-        * Makes no sense to do this check in clustered apic mode, so skip it
-        */
-       if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
-               printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
-                               boot_cpu_physical_apicid);
-               physid_set(hard_smp_processor_id(), phys_cpu_present_map);
-       }
-
-       /*
-        * If we couldn't find a local APIC, then get out of here now!
-        */
-       if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
-               printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
-                       boot_cpu_physical_apicid);
-               printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
-               smpboot_clear_io_apic_irqs();
-               phys_cpu_present_map = physid_mask_of_physid(0);
-               return;
-       }
-
-       verify_local_APIC();
-#endif
-
-       /*
-        * If SMP should be disabled, then really disable it!
-        */
-       if (!max_cpus) {
-               HYPERVISOR_shared_info->n_vcpu = 1;
-               printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
-               smpboot_clear_io_apic_irqs();
-#if 0
-               phys_cpu_present_map = physid_mask_of_physid(0);
-#endif
-               return;
-       }
-
-       smp_intr_init();
-
-#if 0
-       connect_bsp_APIC();
-       setup_local_APIC();
-#endif
-       map_cpu_to_logical_apicid();
-#if 0
-
-
-       setup_portio_remap();
-
-       /*
-        * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
-        *
-        * In clustered apic mode, phys_cpu_present_map is a constructed thus:
-        * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
-        * clustered apic ID.
-        */
-       Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
-#endif
-       Dprintk("CPU present map: %lx\n",
-               (1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
-
-       kicked = 1;
-       for (cpu = 1; kicked < NR_CPUS &&
-                    cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
-               if (max_cpus <= cpucount+1)
-                       continue;
-
-               if (do_boot_cpu(cpu))
-                       printk("CPU #%d not responding - cannot use it.\n",
-                                                               cpu);
-               else
-                       ++kicked;
-       }
-
-#if 0
-       /*
-        * Cleanup possible dangling ends...
-        */
-       smpboot_restore_warm_reset_vector();
-#endif
-
-       /*
-        * Allow the user to impress friends.
-        */
-       Dprintk("Before bogomips.\n");
-       for (cpu = 0; cpu < NR_CPUS; cpu++)
-               if (cpu_isset(cpu, cpu_callout_map))
-                       bogosum += cpu_data[cpu].loops_per_jiffy;
-       printk(KERN_INFO
-               "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
-               cpucount+1,
-               bogosum/(500000/HZ),
-               (bogosum/(5000/HZ))%100);
-       
-       Dprintk("Before bogocount - setting activated=1.\n");
-
-       if (smp_b_stepping)
-               printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
-
-       /*
-        * Don't taint if we are running SMP kernel on a single non-MP
-        * approved Athlon
-        */
-       if (tainted & TAINT_UNSAFE_SMP) {
-               if (cpucount)
-                       printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
-               else
-                       tainted &= ~TAINT_UNSAFE_SMP;
-       }
-
-       Dprintk("Boot done.\n");
-
-       /*
-        * construct cpu_sibling_map[], so that we can tell sibling CPUs
-        * efficiently.
-        */
-       for (cpu = 0; cpu < NR_CPUS; cpu++)
-               cpus_clear(cpu_sibling_map[cpu]);
-
-       for (cpu = 0; cpu < NR_CPUS; cpu++) {
-               int siblings = 0;
-               int i;
-               if (!cpu_isset(cpu, cpu_callout_map))
-                       continue;
-
-               if (smp_num_siblings > 1) {
-                       for (i = 0; i < NR_CPUS; i++) {
-                               if (!cpu_isset(i, cpu_callout_map))
-                                       continue;
-                               if (phys_proc_id[cpu] == phys_proc_id[i]) {
-                                       siblings++;
-                                       cpu_set(i, cpu_sibling_map[cpu]);
-                               }
-                       }
-               } else {
-                       siblings++;
-                       cpu_set(cpu, cpu_sibling_map[cpu]);
-               }
-
-               if (siblings != smp_num_siblings)
-                       printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
-       }
-
-#if 0
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               check_nmi_watchdog();
-
-       smpboot_setup_io_apic();
-
-       setup_boot_APIC_clock();
-
-       /*
-        * Synchronize the TSC with the AP
-        */
-       if (cpu_has_tsc && cpucount && cpu_khz)
-               synchronize_tsc_bp();
-#endif
-}
-
-/* These are wrappers to interface to the new boot process.  Someone
-   who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
-void __init smp_prepare_cpus(unsigned int max_cpus)
-{
-       smp_boot_cpus(max_cpus);
-}
-
-void __devinit smp_prepare_boot_cpu(void)
-{
-       cpu_set(smp_processor_id(), cpu_online_map);
-       cpu_set(smp_processor_id(), cpu_callout_map);
-}
-
-int __devinit __cpu_up(unsigned int cpu)
-{
-       /* This only works at boot for x86.  See "rewrite" above. */
-       if (cpu_isset(cpu, smp_commenced_mask)) {
-               local_irq_enable();
-               return -ENOSYS;
-       }
-
-       /* In case one didn't come up */
-       if (!cpu_isset(cpu, cpu_callin_map)) {
-               local_irq_enable();
-               return -EIO;
-       }
-
-       local_irq_enable();
-       /* Unleash the CPU! */
-       cpu_set(cpu, smp_commenced_mask);
-       while (!cpu_isset(cpu, cpu_online_map))
-               mb();
-       return 0;
-}
-
-void __init smp_cpus_done(unsigned int max_cpus)
-{
-#if 1
-#else
-#ifdef CONFIG_X86_IO_APIC
-       setup_ioapic_dest();
-#endif
-       zap_low_mappings();
-       /*
-        * Disable executability of the SMP trampoline:
-        */
-       set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
-#endif
-}
-
-extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
-
-static struct irqaction reschedule_irq = {
-       smp_reschedule_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "reschedule",
-       NULL, NULL
-};
-
-extern irqreturn_t smp_invalidate_interrupt(int, void *, struct pt_regs *);
-
-static struct irqaction invalidate_irq = {
-       smp_invalidate_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "invalidate",
-       NULL, NULL
-};
-
-extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
-
-static struct irqaction call_function_irq = {
-       smp_call_function_interrupt, SA_INTERRUPT, CPU_MASK_NONE,
-       "call_function", NULL, NULL
-};
-
-void __init smp_intr_init(void)
-{
-
-       (void)setup_irq(
-           bind_ipi_on_cpu_to_irq(smp_processor_id(), RESCHEDULE_VECTOR),
-           &reschedule_irq);
-       (void)setup_irq(
-           bind_ipi_on_cpu_to_irq(smp_processor_id(), INVALIDATE_TLB_VECTOR),
-           &invalidate_irq);
-       (void)setup_irq(
-           bind_ipi_on_cpu_to_irq(smp_processor_id(), CALL_FUNCTION_VECTOR),
-           &call_function_irq);
-}
diff --git a/linux-2.6.9-xen-sparse/arch/xen/kernel/smp.c b/linux-2.6.9-xen-sparse/arch/xen/kernel/smp.c
deleted file mode 100644 (file)
index 51addc6..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Copyright (C) 2004, Christian Limpach */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/threads.h>
-
-unsigned int __initdata maxcpus = NR_CPUS;
-
-
-/*
- * the frequency of the profiling timer can be changed
- * by writing a multiplier value into /proc/profile.
- */
-int setup_profiling_timer(unsigned int multiplier)
-{
-       printk("setup_profiling_timer\n");
-
-       return 0;
-}
diff --git a/linux-2.6.9-xen-sparse/drivers/xen/blktap/Makefile b/linux-2.6.9-xen-sparse/drivers/xen/blktap/Makefile
deleted file mode 100644 (file)
index 80b7ca0..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-
-obj-y  := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o 
-
diff --git a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.c b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.c
deleted file mode 100644 (file)
index 5e7d47c..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-/******************************************************************************
- * blktap.c
- * 
- * XenLinux virtual block-device tap.
- * 
- * Copyright (c) 2004, Andrew Warfield
- *
- * Based on the original split block driver:
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
- * Copyright (c) 2004, Christian Limpach
- * 
- * Note that unlike the split block driver code, this driver has been developed
- * strictly for Linux 2.6
- */
-
-#include "blktap.h"
-
-int __init xlblk_init(void)
-{
-    ctrl_msg_t               cmsg;
-    blkif_fe_driver_status_t fe_st;
-    blkif_be_driver_status_t be_st;
-
-    printk(KERN_INFO "Initialising Xen block tap device\n");
-
-    DPRINTK("   tap - Backend connection init:\n");
-
-
-    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
-                                    CALLBACK_IN_BLOCKING_CONTEXT);
-
-    /* Send a driver-UP notification to the domain controller. */
-    cmsg.type      = CMSG_BLKIF_FE;
-    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
-    cmsg.length    = sizeof(blkif_fe_driver_status_t);
-    fe_st.status   = BLKIF_DRIVER_STATUS_UP;
-    memcpy(cmsg.msg, &fe_st, sizeof(fe_st));
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-
-    DPRINTK("   tap - Frontend connection init:\n");
-    
-    active_reqs_init();
-    
-    ptfe_blkif.status = DISCONNECTED;
-
-    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, 
-                                    CALLBACK_IN_BLOCKING_CONTEXT);
-
-    /* Send a driver-UP notification to the domain controller. */
-    cmsg.type      = CMSG_BLKIF_BE;
-    cmsg.subtype   = CMSG_BLKIF_BE_DRIVER_STATUS;
-    cmsg.length    = sizeof(blkif_be_driver_status_t);
-    be_st.status   = BLKIF_DRIVER_STATUS_UP;
-    memcpy(cmsg.msg, &be_st, sizeof(be_st));
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-
-    DPRINTK("   tap - Userland channel init:\n");
-
-    blktap_init();
-
-    DPRINTK("Blkif tap device initialized.\n");
-
-    return 0;
-}
-
-void blkdev_suspend(void)
-{
-}
-
-void blkdev_resume(void)
-{
-    ctrl_msg_t               cmsg;
-    blkif_fe_driver_status_t st;    
-
-    /* Send a driver-UP notification to the domain controller. */
-    cmsg.type      = CMSG_BLKIF_FE;
-    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
-    cmsg.length    = sizeof(blkif_fe_driver_status_t);
-    st.status      = BLKIF_DRIVER_STATUS_UP;
-    memcpy(cmsg.msg, &st, sizeof(st));
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-
-__initcall(xlblk_init);
diff --git a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.h b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.h
deleted file mode 100644 (file)
index 7e5d73d..0000000
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * blktap.h
- * 
- * Interfaces for the Xen block tap driver.
- * 
- * (c) 2004, Andrew Warfield, University of Cambridge
- * 
- */
-
-#ifndef __BLKTAP_H__
-#define __BLKTAP_H__
-
-#include <linux/version.h>
-#include <linux/blkdev.h>
-#include <linux/config.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <asm-xen/ctrl_if.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <asm/io.h>
-#include <asm/setup.h>
-#include <asm/pgalloc.h>
-#include <asm-xen/hypervisor.h>
-#include <asm-xen/xen-public/io/blkif.h>
-
-/* -------[ debug / pretty printing ]--------------------------------- */
-
-#if 0
-#define ASSERT(_p) \
-    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
-    __LINE__, __FILE__); *(int*)0=0; }
-#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
-                           __FILE__ , __LINE__ , ## _a )
-#else
-#define ASSERT(_p) ((void)0)
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
-
-/* -------[ connection / request tracking ]--------------------------- */
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define VMALLOC_VMADDR(x) ((unsigned long)(x))
-#endif
-
-extern spinlock_t blkif_io_lock;
-
-typedef struct blkif_st {
-    /* Unique identifier for this interface. */
-    domid_t          domid;
-    unsigned int     handle;
-    /* Physical parameters of the comms window. */
-    unsigned long    shmem_frame;
-    unsigned int     evtchn;
-    int              irq;
-    /* Comms information. */
-    blkif_ring_t    *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
-    BLKIF_RING_IDX     blk_req_cons;  /* Request consumer. */
-    BLKIF_RING_IDX     blk_resp_prod; /* Private version of resp. producer. */
-    
-    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
-    /*
-     * DISCONNECT response is deferred until pending requests are ack'ed.
-     * We therefore need to store the id from the original request.
-     */    u8               disconnect_rspid;
-    struct blkif_st *hash_next;
-    struct list_head blkdev_list;
-    spinlock_t       blk_ring_lock;
-    atomic_t         refcnt;
-    
-    struct work_struct work;
-} blkif_t;
-
-typedef struct {
-    blkif_t       *blkif;
-    unsigned long  id;
-    int            nr_pages;
-    unsigned long  mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    unsigned long  virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    int            next_free;
-} active_req_t;
-
-
-/* -------[ block ring structs ]-------------------------------------- */
-
-/* Types of ring. */
-#define BLKIF_REQ_RING_TYPE 1
-#define BLKIF_RSP_RING_TYPE 2
-
-/* generic ring struct. */
-typedef struct blkif_generic_ring_struct {
-    int type;
-} blkif_generic_ring_t;
-
-/* A requestor's view of a ring. */
-typedef struct blkif_req_ring_struct {
-
-    int type;                    /* Will be BLKIF_REQ_RING_TYPE        */
-    BLKIF_RING_IDX req_prod;     /* PRIVATE req_prod index             */
-    BLKIF_RING_IDX rsp_cons;     /* Response consumer index            */
-    blkif_ring_t *ring;          /* Pointer to shared ring struct      */
-
-} blkif_req_ring_t;
-
-#define BLKIF_REQ_RING_INIT { BLKIF_REQ_RING_TYPE, 0, 0, 0 }
-
-/* A responder's view of a ring. */
-typedef struct blkif_rsp_ring_struct {
-
-    int type;       
-    BLKIF_RING_IDX rsp_prod;     /* PRIVATE rsp_prod index             */
-    BLKIF_RING_IDX req_cons;     /* Request consumer index             */
-    blkif_ring_t *ring;          /* Pointer to shared ring struct      */
-
-} blkif_rsp_ring_t;
-
-#define BLKIF_RSP_RING_INIT = { BLKIF_RSP_RING_TYPE, 0, 0, 0 }
-
-#define RING(a) (blkif_generic_ring_t *)(a)
-
-inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring);
-
-
-/* -------[ interposition -> character device interface ]------------- */
-
-/* /dev/xen/blktap resides at device number major=10, minor=200        */ 
-#define BLKTAP_MINOR 202
-
-/* size of the extra VMA area to map in attached pages. */
-#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
-
-/* blktap IOCTLs:                                                      */
-#define BLKTAP_IOCTL_KICK_FE         1
-#define BLKTAP_IOCTL_KICK_BE         2
-#define BLKTAP_IOCTL_SETMODE         3
-
-/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
-#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
-#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
-#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
-#define BLKTAP_MODE_COPY_FE          0x00000004
-#define BLKTAP_MODE_COPY_BE          0x00000008
-#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
-#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
-
-#define BLKTAP_MODE_INTERPOSE \
-           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
-
-#define BLKTAP_MODE_COPY_BOTH \
-           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
-
-#define BLKTAP_MODE_COPY_BOTH_PAGES \
-           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
-
-static inline int BLKTAP_MODE_VALID(unsigned long arg)
-{
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
-        );
-}
-
-
-
-/* -------[ Mappings to User VMA ]------------------------------------ */
-#define MAX_PENDING_REQS 64
-#define BATCH_PER_DOMAIN 16
-extern struct vm_area_struct *blktap_vma;
-
-/* The following are from blkback.c and should probably be put in a
- * header and included from there.
- * The mmap area described here is where attached data pages eill be mapped.
- */
-extern unsigned long mmap_vstart;
-#define MMAP_PAGES_PER_REQUEST \
-    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
-#define MMAP_PAGES             \
-    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg)                        \
-    (mmap_vstart +                                   \
-     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
-     ((_seg) * PAGE_SIZE))
-
-/* immediately before the mmap area, we have a bunch of pages reserved
- * for shared memory rings.
- */
-
-#define RING_PAGES 128 
-extern unsigned long rings_vstart;
-
-/* -------[ Here be globals ]----------------------------------------- */
-
-extern unsigned long blktap_mode;
-
-
-/* blkif struct, containing ring to FE domain */
-extern blkif_t ptfe_blkif; 
-
-/* Connection to a single backend domain. */
-extern blkif_ring_t *blk_ptbe_ring;   /* Ring from the PT to the BE dom    */ 
-extern BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */
-extern BLKIF_RING_IDX ptbe_req_prod;  /* Private request producer.         */
-
-/* Rings up to user space. */ 
-extern blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT;
-extern blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT;
-
-/* Event channel to backend domain. */
-extern unsigned int blkif_ptbe_evtchn;
-
-/* User ring status... this will soon vanish into a ring struct. */
-extern unsigned long blktap_ring_ok;
-
-/* -------[ ...and function prototypes. ]----------------------------- */
-
-/* init function for character device interface.                       */
-int blktap_init(void);
-
-/* interfaces to the char driver, passing messages to and from apps.   */
-void blktap_kick_user(void);
-int blktap_write_to_ring(blkif_request_t *req);
-
-
-/* user ring access functions: */
-int blktap_write_fe_ring(blkif_request_t *req);
-int blktap_write_be_ring(blkif_response_t *rsp);
-int blktap_read_fe_ring(void);
-int blktap_read_be_ring(void);
-
-/* and the helpers they call: */
-inline int write_resp_to_fe_ring(blkif_response_t *rsp);
-inline void kick_fe_domain(void);
-
-inline int write_req_to_be_ring(blkif_request_t *req);
-inline void kick_be_domain(void);
-
-/* Interrupt handlers. */
-irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
-                                  struct pt_regs *ptregs);
-irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs);
-
-/* Control message receiver. */
-extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
-
-#define __BLKINT_H__
-#endif
diff --git a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
deleted file mode 100644 (file)
index a3d485a..0000000
+++ /dev/null
@@ -1,358 +0,0 @@
-/******************************************************************************
- * blktap_controlmsg.c
- * 
- * XenLinux virtual block-device tap.
- * Control interfaces to the frontend and backend drivers.
- * 
- * Copyright (c) 2004, Andrew Warfield
- *
- */
-#include "blktap.h"
-
-#define BLKIF_STATE_CLOSED       0
-#define BLKIF_STATE_DISCONNECTED 1
-#define BLKIF_STATE_CONNECTED    2
-
-static char *blkif_state_name[] = {
-    [BLKIF_STATE_CLOSED]       = "closed",
-    [BLKIF_STATE_DISCONNECTED] = "disconnected",
-    [BLKIF_STATE_CONNECTED]    = "connected",
-};
-
-static char * blkif_status_name[] = {
-    [BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
-    [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
-    [BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
-    [BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
-};
-static unsigned int blkif_pt_state = BLKIF_STATE_CLOSED;
-static unsigned blkif_ptbe_irq;
-unsigned int blkif_ptbe_evtchn;
-
-/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/
-
-
-void blkif_ptfe_create(blkif_be_create_t *create)
-{
-    blkif_t      *blkif;
-    domid_t       domid  = create->domid;
-    unsigned int  handle = create->blkif_handle;
-
-
-    /* May want to store info on the connecting domain here. */
-
-    DPRINTK("PT got BE_CREATE\n");
-    blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */
-
-    /* blkif struct init code from blkback.c */
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid  = domid;
-    blkif->handle = handle;
-    blkif->status = DISCONNECTED;    
-    spin_lock_init(&blkif->blk_ring_lock);
-    atomic_set(&blkif->refcnt, 0);
-
-    create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-
-void blkif_ptfe_destroy(blkif_be_destroy_t *destroy)
-{
-    /* Clear anything that we initialized above. */
-
-    DPRINTK("PT got BE_DESTROY\n");
-    destroy->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_ptfe_connect(blkif_be_connect_t *connect)
-{
-    domid_t       domid  = connect->domid;
-    /*unsigned int  handle = connect->blkif_handle;*/
-    unsigned int  evtchn = connect->evtchn;
-    unsigned long shmem_frame = connect->shmem_frame;
-    struct vm_struct *vma;
-    pgprot_t      prot;
-    int           error;
-    blkif_t      *blkif;
-
-    DPRINTK("PT got BE_CONNECT\n");
-
-    blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */
-
-    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
-    {
-        connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
-    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
-                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
-                                    prot, domid);
-    if ( error != 0 )
-    {
-        WPRINTK("BE_CONNECT: error! (%d)\n", error);
-        if ( error == -ENOMEM ) 
-            connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        else if ( error == -EFAULT ) {
-            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
-            WPRINTK("BE_CONNECT: MAPPING error!\n");
-        }
-        else
-            connect->status = BLKIF_BE_STATUS_ERROR;
-        vfree(vma->addr);
-        return;
-    }
-
-    if ( blkif->status != DISCONNECTED )
-    {
-        connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
-        vfree(vma->addr);
-        return;
-    }
-
-    blkif->evtchn        = evtchn;
-    blkif->irq           = bind_evtchn_to_irq(evtchn);
-    blkif->shmem_frame   = shmem_frame;
-    blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
-    blkif->status        = CONNECTED;
-    /*blkif_get(blkif);*/
-
-    request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
-
-    connect->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect)
-{
-    /*
-     * don't actually set the passthrough to disconnected.
-     * We just act as a pipe, and defer to the real ends to handle things like
-     * recovery.
-     */
-
-    DPRINTK("PT got BE_DISCONNECT\n");
-
-    disconnect->status = BLKIF_BE_STATUS_OKAY;
-    return;
-}
-
-/*-----[ Control Messages to/from Backend VM ]----------------------------*/
-
-/* Tell the controller to bring up the interface. */
-static void blkif_ptbe_send_interface_connect(void)
-{
-    ctrl_msg_t cmsg = {
-        .type    = CMSG_BLKIF_FE,
-        .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
-        .length  = sizeof(blkif_fe_interface_connect_t),
-    };
-    blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
-    msg->handle      = 0;
-    msg->shmem_frame = virt_to_machine(blk_ptbe_ring) >> PAGE_SHIFT;
-    
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-static void blkif_ptbe_close(void)
-{
-}
-
-/* Move from CLOSED to DISCONNECTED state. */
-static void blkif_ptbe_disconnect(void)
-{
-    blk_ptbe_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
-    blk_ptbe_ring->req_prod = blk_ptbe_ring->resp_prod 
-                            = ptbe_resp_cons = ptbe_req_prod = 0;
-    blkif_pt_state  = BLKIF_STATE_DISCONNECTED;
-    DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n");
-    blkif_ptbe_send_interface_connect();
-}
-
-static void blkif_ptbe_connect(blkif_fe_interface_status_t *status)
-{
-    int err = 0;
-    
-    blkif_ptbe_evtchn = status->evtchn;
-    blkif_ptbe_irq    = bind_evtchn_to_irq(blkif_ptbe_evtchn);
-
-    err = request_irq(blkif_ptbe_irq, blkif_ptbe_int, 
-                      SA_SAMPLE_RANDOM, "blkif", NULL);
-    if ( err ) {
-       WPRINTK("blkfront request_irq failed (%d)\n", err);
-        return;
-    } else {
-       /* transtion to connected in case we need to do a 
-           a partion probe on a whole disk */
-        blkif_pt_state = BLKIF_STATE_CONNECTED;
-    }
-}
-
-static void unexpected(blkif_fe_interface_status_t *status)
-{
-    WPRINTK(" TAP: Unexpected blkif status %s in state %s\n", 
-           blkif_status_name[status->status],
-           blkif_state_name[blkif_pt_state]);
-}
-
-static void blkif_ptbe_status(
-    blkif_fe_interface_status_t *status)
-{
-    if ( status->handle != 0 )
-    {
-        DPRINTK("Status change on unsupported blkif %d\n",
-               status->handle);
-        return;
-    }
-
-    DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]);
-    
-    switch ( status->status )
-    {
-    case BLKIF_INTERFACE_STATUS_CLOSED:
-        switch ( blkif_pt_state )
-        {
-        case BLKIF_STATE_CLOSED:
-            unexpected(status);
-            break;
-        case BLKIF_STATE_DISCONNECTED:
-        case BLKIF_STATE_CONNECTED:
-            unexpected(status);
-            blkif_ptbe_close();
-            break;
-        }
-        break;
-        
-    case BLKIF_INTERFACE_STATUS_DISCONNECTED:
-        switch ( blkif_pt_state )
-        {
-        case BLKIF_STATE_CLOSED:
-            blkif_ptbe_disconnect();
-            break;
-        case BLKIF_STATE_DISCONNECTED:
-        case BLKIF_STATE_CONNECTED:
-            printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n");
-            unexpected(status);
-            break;
-        }
-        break;
-        
-    case BLKIF_INTERFACE_STATUS_CONNECTED:
-        switch ( blkif_pt_state )
-        {
-        case BLKIF_STATE_CLOSED:
-            unexpected(status);
-            blkif_ptbe_disconnect();
-            blkif_ptbe_connect(status);
-            break;
-        case BLKIF_STATE_DISCONNECTED:
-            blkif_ptbe_connect(status);
-            break;
-        case BLKIF_STATE_CONNECTED:
-            unexpected(status);
-            blkif_ptbe_connect(status);
-            break;
-        }
-        break;
-
-   case BLKIF_INTERFACE_STATUS_CHANGED:
-        switch ( blkif_pt_state )
-        {
-        case BLKIF_STATE_CLOSED:
-        case BLKIF_STATE_DISCONNECTED:
-            unexpected(status);
-            break;
-        case BLKIF_STATE_CONNECTED:
-            /* vbd_update(); */
-            /* tap doesn't really get state changes... */
-            unexpected(status);
-            break;
-        }
-       break;
-       
-    default:
-        DPRINTK("Status change to unknown value %d\n", status->status);
-        break;
-    }
-}
-
-/*-----[ All control messages enter here: ]-------------------------------*/
-
-void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-    switch ( msg->type )
-    {
-    case CMSG_BLKIF_FE:
-
-        switch ( msg->subtype )
-        {
-        case CMSG_BLKIF_FE_INTERFACE_STATUS:
-            if ( msg->length != sizeof(blkif_fe_interface_status_t) )
-                goto parse_error;
-            blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]);
-            break;        
-
-        default:
-            goto parse_error;
-        }
-
-    case CMSG_BLKIF_BE:
-        
-        switch ( msg->subtype )
-        {
-        case CMSG_BLKIF_BE_CREATE:
-            if ( msg->length != sizeof(blkif_be_create_t) )
-                goto parse_error;
-            blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]);
-            break; 
-        case CMSG_BLKIF_BE_DESTROY:
-            if ( msg->length != sizeof(blkif_be_destroy_t) )
-                goto parse_error;
-            blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]);
-            break;        
-        case CMSG_BLKIF_BE_CONNECT:
-            if ( msg->length != sizeof(blkif_be_connect_t) )
-                goto parse_error;
-            blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]);
-            break;        
-        case CMSG_BLKIF_BE_DISCONNECT:
-            if ( msg->length != sizeof(blkif_be_disconnect_t) )
-                goto parse_error;
-            blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0]);
-            break;        
-
-        /* We just ignore anything to do with vbds for now. */
-        
-        case CMSG_BLKIF_BE_VBD_CREATE:
-            DPRINTK("PT got VBD_CREATE\n");
-            ((blkif_be_vbd_create_t *)&msg->msg[0])->status 
-                = BLKIF_BE_STATUS_OKAY;
-            break;
-        case CMSG_BLKIF_BE_VBD_DESTROY:
-            DPRINTK("PT got VBD_DESTROY\n");
-            ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status
-                = BLKIF_BE_STATUS_OKAY;
-            break;
-        case CMSG_BLKIF_BE_VBD_GROW:
-            DPRINTK("PT got VBD_GROW\n");
-            ((blkif_be_vbd_grow_t *)&msg->msg[0])->status
-                = BLKIF_BE_STATUS_OKAY;
-            break;
-        case CMSG_BLKIF_BE_VBD_SHRINK:
-            DPRINTK("PT got VBD_SHRINK\n");
-            ((blkif_be_vbd_shrink_t *)&msg->msg[0])->status
-                = BLKIF_BE_STATUS_OKAY;
-            break;
-        default:
-            goto parse_error;
-        }
-    }
-
-    ctrl_if_send_response(msg);
-    return;
-
- parse_error:
-    msg->length = 0;
-    ctrl_if_send_response(msg);
-}
diff --git a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_datapath.c b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_datapath.c
deleted file mode 100644 (file)
index c8733dc..0000000
+++ /dev/null
@@ -1,517 +0,0 @@
-/******************************************************************************
- * blktap_datapath.c
- * 
- * XenLinux virtual block-device tap.
- * Block request routing data path.
- * 
- * Copyright (c) 2004, Andrew Warfield
- *
- */
-#include "blktap.h"
-
-/*-----[ The data paths ]-------------------------------------------------*/
-/* Connections to the frontend domains.*/
-blkif_t   ptfe_blkif; 
-/* Connection to a single backend domain. */
-blkif_ring_t *blk_ptbe_ring;   /* Ring from the PT to the BE dom    */ 
-BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */
-BLKIF_RING_IDX ptbe_req_prod;  /* Private request producer.         */
-
-/* Rings up to user space. */ 
-blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT;
-blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT;
-
-/*-----[ Ring helpers ]---------------------------------------------------*/
-
-inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring)
-{
-    if (ring->type == BLKIF_REQ_RING_TYPE) {
-        blkif_req_ring_t *r = (blkif_req_ring_t *)ring;
-        return ( ( r->req_prod - r->rsp_cons ) == BLKIF_RING_SIZE );
-    }
-    
-    /* for now assume that there is always room in the response path. */
-    return 0;
-}
-
-/*-----[ Tracking active requests ]---------------------------------------*/
-
-/* this must be the same as MAX_PENDING_REQS in blkback.c */
-#define MAX_ACTIVE_REQS 64
-
-active_req_t  active_reqs[MAX_ACTIVE_REQS];
-unsigned char active_req_ring[MAX_ACTIVE_REQS];
-spinlock_t    active_req_lock = SPIN_LOCK_UNLOCKED;
-typedef unsigned int ACTIVE_RING_IDX;
-ACTIVE_RING_IDX active_prod, active_cons;
-#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
-#define ACTIVE_IDX(_ar) (_ar - active_reqs)
-
-inline active_req_t *get_active_req(void) 
-{
-    ASSERT(active_cons != active_prod);    
-    return &active_reqs[MASK_ACTIVE_IDX(active_cons++)];
-}
-
-inline void free_active_req(active_req_t *ar) 
-{
-    unsigned long flags;
-        
-    spin_lock_irqsave(&active_req_lock, flags);
-    active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
-    spin_unlock_irqrestore(&active_req_lock, flags);
-}
-
-inline void active_reqs_init(void)
-{
-    ACTIVE_RING_IDX i;
-    
-    active_cons = 0;
-    active_prod = MAX_ACTIVE_REQS;
-    memset(active_reqs, 0, sizeof(active_reqs));
-    for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
-        active_req_ring[i] = i;
-}
-
-/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
-
-irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
-{
-    /* we have pending messages from the real frontend. */
-
-    blkif_request_t *req_s, *req_d;
-    BLKIF_RING_IDX fe_rp;
-    unsigned long flags;
-    int notify;
-    unsigned long i;
-    active_req_t *ar;
-    
-    DPRINTK("PT got FE interrupt.\n");
-    
-    /* lock both rings */
-    spin_lock_irqsave(&blkif_io_lock, flags);
-
-    /* While there are REQUESTS on FERing: */
-    fe_rp = ptfe_blkif.blk_ring_base->req_prod;
-    rmb();
-    notify = (ptfe_blkif.blk_req_cons != fe_rp);
-
-    for (i = ptfe_blkif.blk_req_cons; i != fe_rp; i++) {
-
-        /* Get the next request */
-        req_s = &ptfe_blkif.blk_ring_base->ring[MASK_BLKIF_IDX(i)].req;
-        
-        /* This is a new request:  
-         * Assign an active request record, and remap the id. 
-         */
-        ar = get_active_req();
-        ar->id = req_s->id;
-        req_s->id = ACTIVE_IDX(ar);
-        DPRINTK("%3lu < %3lu\n", req_s->id, ar->id);
-
-        /* FE -> BE interposition point is here. */
-        
-        /* ------------------------------------------------------------- */
-        /* BLKIF_OP_PROBE_HACK:                                          */
-        /* Until we have grant tables, we need to allow the backent to   */
-        /* map pages that are either from this domain, or more commonly  */
-        /* from the real front end.  We achieve this in a terrible way,  */
-        /* by passing the front end's domid allong with PROBE messages   */
-        /* Once grant tables appear, this should all go away.            */
-
-        if (req_s->operation == BLKIF_OP_PROBE) {
-            DPRINTK("Adding FE domid to PROBE request.\n");
-            (domid_t)(req_s->frame_and_sects[1]) = ptfe_blkif.domid;
-        }
-
-        /* ------------------------------------------------------------- */
-
-        /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
-        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
-             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
-            
-            /* Copy the response message to UFERing */
-            /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
-            /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
-
-            /* XXX: mapping/copying of attached pages is still not done! */
-
-            DPRINTK("req->UFERing\n"); 
-            blktap_write_fe_ring(req_s);
-
-
-        }
-
-        /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
-        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
-               (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
-            
-            /* be included to prevent noise from the fe when its off */
-            /* copy the request message to the BERing */
-
-            DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", 
-                    (unsigned)MASK_BLKIF_IDX(i), 
-                    (unsigned)MASK_BLKIF_IDX(ptbe_req_prod));
-
-            req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req;
-            
-            memcpy(req_d, req_s, sizeof(blkif_request_t));
-
-            ptbe_req_prod++;
-        }
-    }
-
-    ptfe_blkif.blk_req_cons = i;
-
-    /* If we have forwarded any responses, notify the appropriate ends. */
-    if (notify) {
-
-        /* we have sent stuff to the be, notify it. */
-        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
-               (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
-            wmb();
-            blk_ptbe_ring->req_prod = ptbe_req_prod;
-
-            notify_via_evtchn(blkif_ptbe_evtchn);
-            DPRINTK(" -- and notified.\n");
-        }
-
-        /* we sent stuff to the app, notify it. */
-        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
-             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
-
-            blktap_kick_user();
-        }
-    }
-
-    /* unlock rings */
-    spin_unlock_irqrestore(&blkif_io_lock, flags);
-
-    return IRQ_HANDLED;
-}
-
-inline int write_req_to_be_ring(blkif_request_t *req)
-{
-    blkif_request_t *req_d;
-
-    req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req;
-    memcpy(req_d, req, sizeof(blkif_request_t));
-    ptbe_req_prod++;
-
-    return 0;
-}
-
-inline void kick_be_domain(void) {
-    wmb();
-    blk_ptbe_ring->req_prod = ptbe_req_prod;
-    notify_via_evtchn(blkif_ptbe_evtchn);
-}
-
-/*-----[ Data to/from Backend (server) VM ]------------------------------*/
-
-
-irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
-                                  struct pt_regs *ptregs)
-{
-    blkif_response_t  *resp_s, *resp_d;
-    BLKIF_RING_IDX be_rp;
-    unsigned long flags;
-    int notify;
-    unsigned long i;
-    active_req_t *ar;
-
-    DPRINTK("PT got BE interrupt.\n");
-
-    /* lock both rings */
-    spin_lock_irqsave(&blkif_io_lock, flags);
-    
-    /* While there are RESPONSES on BERing: */
-    be_rp = blk_ptbe_ring->resp_prod;
-    rmb();
-    notify = (ptbe_resp_cons != be_rp);
-    
-    for ( i = ptbe_resp_cons; i != be_rp; i++ )
-    {
-        /* BE -> FE interposition point is here. */
-        
-        /* Get the next response */
-        resp_s = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(i)].resp;
-    
-       
-        /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
-        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
-             (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
-
-            /* Copy the response message to UBERing */
-            /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
-            /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
-
-            /* XXX: copy/map the attached page! */
-
-            DPRINTK("rsp->UBERing\n"); 
-            blktap_write_be_ring(resp_s);
-
-        }
-       
-        /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
-        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
-               (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
-            
-            /* (fe included to prevent random interference from the BE) */
-            /* Copy the response message to FERing */
-         
-            DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", 
-                    (unsigned) MASK_BLKIF_IDX(i), 
-                    (unsigned) MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod));
-
-            /* remap id, and free the active req. blkif lookup goes here too.*/
-            ar = &active_reqs[resp_s->id];
-            DPRINTK("%3lu > %3lu\n", resp_s->id, ar->id);
-            resp_s->id = ar->id;
-            free_active_req(ar);
-           
-            resp_d = &ptfe_blkif.blk_ring_base->ring[
-                MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp;
-
-            memcpy(resp_d, resp_s, sizeof(blkif_response_t));
-            
-            ptfe_blkif.blk_resp_prod++;
-
-        }
-    }
-
-    ptbe_resp_cons = i;
-    
-    /* If we have forwarded any responses, notify the apropriate domains. */
-    if (notify) {
-
-        /* we have sent stuff to the fe.  notify it. */
-        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
-               (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
-            wmb();
-            ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod;
-        
-            notify_via_evtchn(ptfe_blkif.evtchn);
-            DPRINTK(" -- and notified.\n");
-        }
-
-        /* we sent stuff to the app, notify it. */
-        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
-             (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
-
-            blktap_kick_user();
-        }
-    }
-
-    spin_unlock_irqrestore(&blkif_io_lock, flags);
-    return IRQ_HANDLED;
-}
-
-inline int write_resp_to_fe_ring(blkif_response_t *rsp)
-{
-    blkif_response_t *resp_d;
-    active_req_t *ar;
-    
-    /* remap id, and free the active req. blkif lookup goes here too.*/
-    ar = &active_reqs[rsp->id];
-    DPRINTK("%3lu > %3lu\n", rsp->id, ar->id);
-    rsp->id = ar->id;
-    free_active_req(ar);
-            
-    resp_d = &ptfe_blkif.blk_ring_base->ring[
-        MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp;
-
-    memcpy(resp_d, rsp, sizeof(blkif_response_t));
-    ptfe_blkif.blk_resp_prod++;
-
-    return 0;
-}
-
-inline void kick_fe_domain(void) {
-    wmb();
-    ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod;
-    notify_via_evtchn(ptfe_blkif.evtchn);
-    
-}
-
-static inline void flush_requests(void)
-{
-    wmb(); /* Ensure that the frontend can see the requests. */
-    blk_ptbe_ring->req_prod = ptbe_req_prod;
-    notify_via_evtchn(blkif_ptbe_evtchn);
-}
-
-/*-----[ Data to/from user space ]----------------------------------------*/
-
-
-int blktap_write_fe_ring(blkif_request_t *req)
-{
-    blkif_request_t *target;
-    int error, i;
-
-    /*
-     * This is called to pass a request from the real frontend domain's
-     * blkif ring to the character device.
-     */
-
-    if ( ! blktap_ring_ok ) {
-        DPRINTK("blktap: fe_ring not ready for a request!\n");
-        return 0;
-    }
-
-    if ( BLKTAP_RING_FULL(RING(&fe_ring)) ) {
-        DPRINTK("blktap: fe_ring is full, can't add.\n");
-        return 0;
-    }
-
-    target = &fe_ring.ring->ring[MASK_BLKIF_IDX(fe_ring.req_prod)].req;
-    memcpy(target, req, sizeof(*req));
-
-/* maybe move this stuff out into a seperate func ------------------- */
-
-    /*
-     * For now, map attached page into a fixed position into the vma.
-     * XXX: make this map to a free page.
-     */
-
-    /* Attempt to map the foreign pages directly in to the application */
-    for (i=0; i<target->nr_segments; i++) {
-
-        /* get an unused virtual address from the char device */
-        /* store the old page address */
-        /* replace the address with the virtual address */
-
-        /* blktap_vma->vm_start+((2+i)*PAGE_SIZE) */
-
-        error = direct_remap_area_pages(blktap_vma->vm_mm, 
-                                        MMAP_VADDR(req->id, i), 
-                                        target->frame_and_sects[0] & PAGE_MASK,
-                                        PAGE_SIZE,
-                                        blktap_vma->vm_page_prot,
-                                        ptfe_blkif.domid);
-        if ( error != 0 ) {
-            printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
-            return 0;
-        }
-    }
-    /* fix the address of the attached page in the message. */
-    /* TODO:      preserve the segment number stuff here... */
-    /* target->frame_and_sects[0] = blktap_vma->vm_start + PAGE_SIZE;*/
-/* ------------------------------------------------------------------ */
-
-    
-    fe_ring.req_prod++;
-
-    return 0;
-}
-
-int blktap_write_be_ring(blkif_response_t *rsp)
-{
-    blkif_response_t *target;
-
-    /*
-     * This is called to pass a request from the real backend domain's
-     * blkif ring to the character device.
-     */
-
-    if ( ! blktap_ring_ok ) {
-        DPRINTK("blktap: be_ring not ready for a request!\n");
-        return 0;
-    }
-
-    if ( BLKTAP_RING_FULL(RING(&be_ring)) ) {
-        DPRINTK("blktap: be_ring is full, can't add.\n");
-        return 0;
-    }
-
-    target = &be_ring.ring->ring[MASK_BLKIF_IDX(be_ring.rsp_prod)].resp;
-    memcpy(target, rsp, sizeof(*rsp));
-
-
-    /* XXX: map attached pages and fix-up addresses in the copied address. */
-
-    be_ring.rsp_prod++;
-
-    return 0;
-}
-
-int blktap_read_fe_ring(void)
-{
-    /* This is called to read responses from the UFE ring. */
-
-    BLKIF_RING_IDX fe_rp;
-    unsigned long i;
-    int notify;
-
-    DPRINTK("blktap_read_fe_ring()\n");
-
-    fe_rp = fe_ring.ring->resp_prod;
-    rmb();
-    notify = (fe_rp != fe_ring.rsp_cons);
-
-    /* if we are forwarding from UFERring to FERing */
-    if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
-
-        /* for each outstanding message on the UFEring  */
-        for ( i = fe_ring.rsp_cons; i != fe_rp; i++ ) {
-
-            /* XXX: remap pages on that message as necessary */
-            /* copy the message to the UBEring */
-
-            DPRINTK("resp->fe_ring\n");
-            write_resp_to_fe_ring(&fe_ring.ring->ring[MASK_BLKIF_IDX(i)].resp);
-        }
-    
-        fe_ring.rsp_cons = fe_rp;
-
-        /* notify the fe if necessary */
-        if ( notify ) {
-            DPRINTK("kick_fe_domain()\n");
-            kick_fe_domain();
-        }
-    }
-
-    return 0;
-}
-
-int blktap_read_be_ring(void)
-{
-    /* This is called to read responses from the UBE ring. */
-
-    BLKIF_RING_IDX be_rp;
-    unsigned long i;
-    int notify;
-
-    DPRINTK("blktap_read_be_ring()\n");
-
-    be_rp = be_ring.ring->req_prod;
-    rmb();
-    notify = (be_rp != be_ring.req_cons);
-
-    /* if we are forwarding from UFERring to FERing */
-    if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
-
-        /* for each outstanding message on the UFEring  */
-        for ( i = be_ring.req_cons; i != be_rp; i++ ) {
-
-            /* XXX: remap pages on that message as necessary */
-            /* copy the message to the UBEring */
-
-            DPRINTK("req->be_ring\n");
-            write_req_to_be_ring(&be_ring.ring->ring[MASK_BLKIF_IDX(i)].req);
-        }
-    
-        be_ring.req_cons = be_rp;
-
-        /* notify the fe if necessary */
-        if ( notify ) {
-            DPRINTK("kick_be_domain()\n");
-            kick_be_domain();
-        }
-    }
-
-    return 0;
-}
diff --git a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_userdev.c b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_userdev.c
deleted file mode 100644 (file)
index c10e3f3..0000000
+++ /dev/null
@@ -1,243 +0,0 @@
-/******************************************************************************
- * blktap_userdev.c
- * 
- * XenLinux virtual block-device tap.
- * Control interface between the driver and a character device.
- * 
- * Copyright (c) 2004, Andrew Warfield
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/miscdevice.h>
-#include <linux/errno.h>
-#include <linux/major.h>
-#include <linux/gfp.h>
-#include <linux/poll.h>
-#include <asm/pgalloc.h>
-
-#include "blktap.h"
-
-
-unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH;
-
-/* Only one process may open /dev/xen/blktap at any time. */
-static unsigned long blktap_dev_inuse;
-unsigned long blktap_ring_ok; /* make this ring->state */
-
-/* for poll: */
-static wait_queue_head_t blktap_wait;
-
-/* Where things are inside the device mapping. */
-struct vm_area_struct *blktap_vma;
-unsigned long mmap_vstart;
-unsigned long rings_vstart;
-
-/* -------[ blktap vm ops ]------------------------------------------- */
-
-static struct page *blktap_nopage(struct vm_area_struct *vma,
-                                             unsigned long address,
-                                             int *type)
-{
-    /*
-     * if the page has not been mapped in by the driver then generate
-     * a SIGBUS to the domain.
-     */
-
-    force_sig(SIGBUS, current);
-
-    return 0;
-}
-
-struct vm_operations_struct blktap_vm_ops = {
-    nopage:   blktap_nopage,
-};
-
-/* -------[ blktap file ops ]----------------------------------------- */
-
-static int blktap_open(struct inode *inode, struct file *filp)
-{
-    if ( test_and_set_bit(0, &blktap_dev_inuse) )
-        return -EBUSY;
-
-    printk(KERN_ALERT "blktap open.\n");
-
-    /* Allocate the fe ring. */
-    fe_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL);
-    if (fe_ring.ring == NULL)
-        goto fail_nomem;
-
-    SetPageReserved(virt_to_page(fe_ring.ring));
-    
-    fe_ring.ring->req_prod = fe_ring.ring->resp_prod
-                           = fe_ring.req_prod
-                           = fe_ring.rsp_cons
-                           = 0;
-
-    /* Allocate the be ring. */
-    be_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL);
-    if (be_ring.ring == NULL)
-        goto fail_free_fe;
-
-    SetPageReserved(virt_to_page(be_ring.ring));
-    
-    be_ring.ring->req_prod = be_ring.ring->resp_prod
-                           = be_ring.rsp_prod
-                           = be_ring.req_cons
-                           = 0;
-
-    DPRINTK(KERN_ALERT "blktap open.\n");
-
-    return 0;
-
- fail_free_fe:
-    free_page( (unsigned long) fe_ring.ring);
-
- fail_nomem:
-    return -ENOMEM;
-}
-
-static int blktap_release(struct inode *inode, struct file *filp)
-{
-    blktap_dev_inuse = 0;
-    blktap_ring_ok = 0;
-
-    printk(KERN_ALERT "blktap closed.\n");
-
-    /* Free the ring page. */
-    ClearPageReserved(virt_to_page(fe_ring.ring));
-    free_page((unsigned long) fe_ring.ring);
-
-    ClearPageReserved(virt_to_page(be_ring.ring));
-    free_page((unsigned long) be_ring.ring);
-    
-    return 0;
-}
-
-static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-    int size;
-
-    printk(KERN_ALERT "blktap mmap (%lx, %lx)\n",
-           vma->vm_start, vma->vm_end);
-
-    vma->vm_ops = &blktap_vm_ops;
-
-    size = vma->vm_end - vma->vm_start;
-    if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
-        printk(KERN_INFO 
-               "blktap: you _must_ map exactly %d pages!\n",
-               MMAP_PAGES + RING_PAGES);
-        return -EAGAIN;
-    }
-
-    size >>= PAGE_SHIFT;
-    printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
-    
-    rings_vstart = vma->vm_start;
-    mmap_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
-    
-    /* Map the ring pages to the start of the region and reserve it. */
-
-    /* not sure if I really need to do this... */
-    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-    DPRINTK("Mapping be_ring page %lx.\n", __pa(be_ring.ring));
-    if (remap_page_range(vma, vma->vm_start, __pa(be_ring.ring), PAGE_SIZE, 
-                         vma->vm_page_prot)) {
-        printk(KERN_ERR "be_ring: remap_page_range failure!\n");
-    }
-
-    DPRINTK("Mapping fe_ring page %lx.\n", __pa(fe_ring.ring));
-    if (remap_page_range(vma, vma->vm_start + PAGE_SIZE, __pa(fe_ring.ring), 
-                         PAGE_SIZE, vma->vm_page_prot)) {
-        printk(KERN_ERR "fe_ring: remap_page_range failure!\n");
-    }
-
-    blktap_vma = vma;
-    blktap_ring_ok = 1;
-
-    return 0;
-}
-
-static int blktap_ioctl(struct inode *inode, struct file *filp,
-                        unsigned int cmd, unsigned long arg)
-{
-    switch(cmd) {
-    case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
-        return blktap_read_fe_ring();
-
-    case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */
-        return blktap_read_be_ring();
-
-    case BLKTAP_IOCTL_SETMODE:
-        if (BLKTAP_MODE_VALID(arg)) {
-            blktap_mode = arg;
-            /* XXX: may need to flush rings here. */
-            printk(KERN_INFO "blktap: set mode to %lx\n", arg);
-            return 0;
-        }
-        /* XXX: return a more meaningful error case here. */
-    }
-    return -ENOIOCTLCMD;
-}
-
-static unsigned int blktap_poll(struct file *file, poll_table *wait)
-{
-        poll_wait(file, &blktap_wait, wait);
-
-        if ( (fe_ring.req_prod != fe_ring.ring->req_prod) ||
-             (be_ring.rsp_prod != be_ring.ring->resp_prod) ) {
-
-            fe_ring.ring->req_prod = fe_ring.req_prod;
-            be_ring.ring->resp_prod = be_ring.rsp_prod;
-            return POLLIN | POLLRDNORM;
-        }
-
-        return 0;
-}
-
-void blktap_kick_user(void)
-{
-    /* blktap_ring->req_prod = blktap_req_prod; */
-    wake_up_interruptible(&blktap_wait);
-}
-
-static struct file_operations blktap_fops = {
-    owner:    THIS_MODULE,
-    poll:     blktap_poll,
-    ioctl:    blktap_ioctl,
-    open:     blktap_open,
-    release:  blktap_release,
-    mmap:     blktap_mmap,
-};
-
-/* -------[ blktap module setup ]------------------------------------- */
-
-static struct miscdevice blktap_miscdev = {
-    .minor        = BLKTAP_MINOR,
-    .name         = "blktap",
-    .fops         = &blktap_fops,
-    .devfs_name   = "misc/blktap",
-};
-
-int blktap_init(void)
-{
-    int err;
-
-    err = misc_register(&blktap_miscdev);
-    if ( err != 0 )
-    {
-        printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
-        return err;
-    }
-
-    init_waitqueue_head(&blktap_wait);
-
-
-    return 0;
-}
diff --git a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/hardirq.h b/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/hardirq.h
deleted file mode 100644 (file)
index b5c5362..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef __ASM_HARDIRQ_H
-#define __ASM_HARDIRQ_H
-
-#include <linux/config.h>
-#include <linux/threads.h>
-#include <linux/irq.h>
-
-typedef struct {
-       unsigned int __softirq_pending;
-       unsigned long idle_timestamp;
-       unsigned int __nmi_count;       /* arch dependent */
-       unsigned int apic_timer_irqs;   /* arch dependent */
-} ____cacheline_aligned irq_cpustat_t;
-
-#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
-
-/*
- * We put the hardirq and softirq counter into the preemption
- * counter. The bitmask has the following meaning:
- *
- * - bits 0-7 are the preemption count (max preemption depth: 256)
- * - bits 8-15 are the softirq count (max # of softirqs: 256)
- * - bits 16-24 are the hardirq count (max # of hardirqs: 512)
- *
- * - ( bit 26 is the PREEMPT_ACTIVE flag. )
- *
- * PREEMPT_MASK: 0x000000ff
- * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x01ff0000
- */
-
-#define PREEMPT_BITS   8
-#define SOFTIRQ_BITS   8
-#define HARDIRQ_BITS   9
-
-#define PREEMPT_SHIFT  0
-#define SOFTIRQ_SHIFT  (PREEMPT_SHIFT + PREEMPT_BITS)
-#define HARDIRQ_SHIFT  (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
-#define nmi_enter()            (irq_enter())
-#define nmi_exit()             (preempt_count() -= HARDIRQ_OFFSET)
-
-#define irq_enter()            (preempt_count() += HARDIRQ_OFFSET)
-#define irq_exit()                                                     \
-do {                                                                   \
-               preempt_count() -= IRQ_EXIT_OFFSET;                     \
-               if (!in_interrupt() && softirq_pending(smp_processor_id())) \
-                       do_softirq();                                   \
-               preempt_enable_no_resched();                            \
-} while (0)
-
-#endif /* __ASM_HARDIRQ_H */
diff --git a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h b/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
deleted file mode 100644 (file)
index 421a81f..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
- * which needs to alter them. */
-
-static inline void smpboot_clear_io_apic_irqs(void)
-{
-#if 1
-       printk("smpboot_clear_io_apic_irqs\n");
-#else
-       io_apic_irqs = 0;
-#endif
-}
-
-static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
-{
-#if 1
-       printk("smpboot_setup_warm_reset_vector\n");
-#else
-       CMOS_WRITE(0xa, 0xf);
-       local_flush_tlb();
-       Dprintk("1.\n");
-       *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
-       Dprintk("2.\n");
-       *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
-       Dprintk("3.\n");
-#endif
-}
-
-static inline void smpboot_restore_warm_reset_vector(void)
-{
-       /*
-        * Install writable page 0 entry to set BIOS data area.
-        */
-       local_flush_tlb();
-
-       /*
-        * Paranoid:  Set warm reset code and vector here back
-        * to default values.
-        */
-       CMOS_WRITE(0, 0xf);
-
-       *((volatile long *) phys_to_virt(0x467)) = 0;
-}
-
-static inline void smpboot_setup_io_apic(void)
-{
-#if 1
-       printk("smpboot_setup_io_apic\n");
-#else
-       /*
-        * Here we can be sure that there is an IO-APIC in the system. Let's
-        * go and set it up:
-        */
-       if (!skip_ioapic_setup && nr_ioapics)
-               setup_IO_APIC();
-#endif
-}
-
-
-#define        smp_found_config        (HYPERVISOR_shared_info->n_vcpu > 1)
diff --git a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/spinlock.h b/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/spinlock.h
deleted file mode 100644 (file)
index fb8bd00..0000000
+++ /dev/null
@@ -1,224 +0,0 @@
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-#include <asm/page.h>
-#include <linux/config.h>
-#include <linux/compiler.h>
-
-asmlinkage int printk(const char * fmt, ...)
-       __attribute__ ((format (printf, 1, 2)));
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- */
-
-typedef struct {
-       volatile unsigned int lock;
-#ifdef CONFIG_DEBUG_SPINLOCK
-       unsigned magic;
-#endif
-} spinlock_t;
-
-#define SPINLOCK_MAGIC 0xdead4ead
-
-#ifdef CONFIG_DEBUG_SPINLOCK
-#define SPINLOCK_MAGIC_INIT    , SPINLOCK_MAGIC
-#else
-#define SPINLOCK_MAGIC_INIT    /* */
-#endif
-
-#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
-
-#define spin_lock_init(x)      do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
-
-/*
- * Simple spin lock operations.  There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions. They have a cost.
- */
-
-#define spin_is_locked(x)      (*(volatile signed char *)(&(x)->lock) <= 0)
-#define spin_unlock_wait(x)    do { barrier(); } while(spin_is_locked(x))
-
-#define spin_lock_string \
-       "\n1:\t" \
-       "lock ; decb %0\n\t" \
-       "jns 3f\n" \
-       "2:\t" \
-       "rep;nop\n\t" \
-       "cmpb $0,%0\n\t" \
-       "jle 2b\n\t" \
-       "jmp 1b\n" \
-       "3:\n\t"
-
-#define spin_lock_string_flags \
-       "\n1:\t" \
-       "lock ; decb %0\n\t" \
-       "jns 4f\n\t" \
-       "2:\t" \
-       "testl $0x200, %1\n\t" \
-       "jz 3f\n\t" \
-       "#sti\n\t" \
-       "3:\t" \
-       "rep;nop\n\t" \
-       "cmpb $0, %0\n\t" \
-       "jle 3b\n\t" \
-       "#cli\n\t" \
-       "jmp 1b\n" \
-       "4:\n\t"
-
-/*
- * This works. Despite all the confusion.
- * (except on PPro SMP or if we are using OOSTORE)
- * (PPro errata 66, 92)
- */
-#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
-
-#define spin_unlock_string \
-       "movb $1,%0" \
-               :"=m" (lock->lock) : : "memory"
-
-
-static inline void _raw_spin_unlock(spinlock_t *lock)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
-       BUG_ON(lock->magic != SPINLOCK_MAGIC);
-       BUG_ON(!spin_is_locked(lock));
-#endif
-       __asm__ __volatile__(
-               spin_unlock_string
-       );
-}
-
-#else
-
-#define spin_unlock_string \
-       "xchgb %b0, %1" \
-               :"=q" (oldval), "=m" (lock->lock) \
-               :"0" (oldval) : "memory"
-
-static inline void _raw_spin_unlock(spinlock_t *lock)
-{
-       char oldval = 1;
-#ifdef CONFIG_DEBUG_SPINLOCK
-       BUG_ON(lock->magic != SPINLOCK_MAGIC);
-       BUG_ON(!spin_is_locked(lock));
-#endif
-       __asm__ __volatile__(
-               spin_unlock_string
-       );
-}
-
-#endif
-
-static inline int _raw_spin_trylock(spinlock_t *lock)
-{
-       char oldval;
-       __asm__ __volatile__(
-               "xchgb %b0,%1"
-               :"=q" (oldval), "=m" (lock->lock)
-               :"0" (0) : "memory");
-       return oldval > 0;
-}
-
-static inline void _raw_spin_lock(spinlock_t *lock)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
-       if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
-               printk("eip: %p\n", __builtin_return_address(0));
-               BUG();
-       }
-#endif
-       __asm__ __volatile__(
-               spin_lock_string
-               :"=m" (lock->lock) : : "memory");
-}
-
-static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
-       if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
-               printk("eip: %p\n", __builtin_return_address(0));
-               BUG();
-       }
-#endif
-       __asm__ __volatile__(
-               spin_lock_string_flags
-               :"=m" (lock->lock) : "r" (flags) : "memory");
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-typedef struct {
-       volatile unsigned int lock;
-#ifdef CONFIG_DEBUG_SPINLOCK
-       unsigned magic;
-#endif
-} rwlock_t;
-
-#define RWLOCK_MAGIC   0xdeaf1eed
-
-#ifdef CONFIG_DEBUG_SPINLOCK
-#define RWLOCK_MAGIC_INIT      , RWLOCK_MAGIC
-#else
-#define RWLOCK_MAGIC_INIT      /* */
-#endif
-
-#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
-
-#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
-
-#define rwlock_is_locked(x) ((x)->lock != RW_LOCK_BIAS)
-
-/*
- * On x86, we implement read-write locks as a 32-bit counter
- * with the high bit (sign) being the "contended" bit.
- *
- * The inline assembly is non-obvious. Think about it.
- *
- * Changed to use the same technique as rw semaphores.  See
- * semaphore.h for details.  -ben
- */
-/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
-
-static inline void _raw_read_lock(rwlock_t *rw)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
-       BUG_ON(rw->magic != RWLOCK_MAGIC);
-#endif
-       __build_read_lock(rw, "__read_lock_failed");
-}
-
-static inline void _raw_write_lock(rwlock_t *rw)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
-       BUG_ON(rw->magic != RWLOCK_MAGIC);
-#endif
-       __build_write_lock(rw, "__write_lock_failed");
-}
-
-#define _raw_read_unlock(rw)           asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
-#define _raw_write_unlock(rw)  asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
-
-static inline int _raw_write_trylock(rwlock_t *lock)
-{
-       atomic_t *count = (atomic_t *)lock;
-       if (atomic_sub_and_test(RW_LOCK_BIAS, count))
-               return 1;
-       atomic_add(RW_LOCK_BIAS, count);
-       return 0;
-}
-
-#endif /* __ASM_SPINLOCK_H */